From 6c9fd4b5324a23263d0059846d7be2e2cc1d6880 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Sun, 12 Mar 2023 15:36:41 +0100
Subject: [PATCH 01/20] ablation study initial add

---
 recommerce/rl/ablation_study.py | 52 +++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 recommerce/rl/ablation_study.py

diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
new file mode 100644
index 00000000..1ba8ffe0
--- /dev/null
+++ b/recommerce/rl/ablation_study.py
@@ -0,0 +1,52 @@
+# This is the script describing the ablation study for the paper.
+# It does not contain new framework features, but it stays in the repo to keep the experiments reproducible.
+
+import time
+from multiprocessing import Pipe, Process
+
+from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
+from recommerce.rl.stable_baselines.sb_ppo import StableBaselinesPPO
+
+
+def run_training_session(market_class, config_market, agent_class, config_rl, training_steps, number, pipe_to_parent):
+    agent = agent_class(config_market, config_rl, market_class(config_market, support_continuous_action_space=True), name=f'Train{number}')
+    watcher = agent.train_agent(training_steps)
+    pipe_to_parent.send(watcher)
+
+
+def run_group(market_configs, market_descriptions, training_steps, target_function=run_training_session):
+    market_class = CircularEconomyRebuyPriceDuopoly
+    rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
+    pipes = []
+    for _ in market_configs:
+        pipes.append(Pipe(False))
+    processes = [Process(target=target_function,
+                         args=(market_class, config_market, StableBaselinesPPO, rl_config, training_steps, description, pipe_entry))
+        for config_market, description, (_, pipe_entry) in zip(market_configs, market_descriptions, pipes)]
+    print('Now I start the processes')
+    for p in processes:
+        time.sleep(10)
+        p.start()
+    print('Now I wait for the results')
+    watchers = [output.recv() for output, _ in pipes]
+    print('Now I have the results')
+    for p in processes:
+        p.join()
+    print('All threads joined')
+    return market_descriptions, [watcher.get_progress_values_of_property('profits/all', 0) for watcher in watchers]
+
+
+def get_different_market_configs(parameter_name, values):
+    market_configs = []
+    descriptions = [f'{parameter_name}={value}' for value in values]
+    for value in values:
+        market_config = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
+        market_config[parameter_name] = value
+        market_configs.append(market_config)
+    return market_configs, descriptions
+
+
+if __name__ == '__main__':
+    results = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
+    print(results)

From 2b0a00a354cf6b5a52a97ba9935989115bc73e34 Mon Sep 17 00:00:00 2001
From: Jan Groeneveld <jan.groeneveld@student.hpi.de>
Date: Tue, 14 Mar 2023 21:04:54 +0100
Subject: [PATCH 02/20] added requested exampleprinter

---
 recommerce/configuration/utils.py             |  21 +++++
 recommerce/monitoring/exampleprinter.py       |  36 ++++++--
 recommerce/rl/ablation_study.py               |  80 ++++++++++++++++--
 .../stable_baselines_model.py                 |  10 +++
 sqlite.db                                     | Bin 0 -> 20480 bytes
 tests/test_exampleprinter.py                  |   4 +-
 6 files changed, 137 insertions(+), 14 deletions(-)
 create mode 100644 sqlite.db

diff --git a/recommerce/configuration/utils.py b/recommerce/configuration/utils.py
index 210499a9..ed89945b 100644
--- a/recommerce/configuration/utils.py
+++ b/recommerce/configuration/utils.py
@@ -169,6 +169,27 @@ def unroll_dict_with_list(input_dict: dict) -> dict:
 	return newdict
 
 
+def flatten_dict(input_dict: dict) -> dict:
+	"""
+	This function takes a nested dictionary and recursively flattens it.
+
+	Args:
+		input_dict (dict): the dictionary you would like to flatten
+
+	Returns:
+		dict: the flattened dictionary
+	"""
+	newdict = {}
+	for key in input_dict:
+		if isinstance(input_dict[key], dict):
+			interim_dict = flatten_dict(input_dict[key])
+			for interim_key, value in interim_dict.items():
+				newdict[f'{key}/{interim_key}'] = value
+		else:
+			newdict[key] = input_dict[key]
+	return newdict
+
+
 def write_content_of_dict_to_overview_svg(
 		manipulator: SVGManipulator,
 		episode: int,
diff --git a/recommerce/monitoring/exampleprinter.py b/recommerce/monitoring/exampleprinter.py
index f8b5699a..827189e9 100644
--- a/recommerce/monitoring/exampleprinter.py
+++ b/recommerce/monitoring/exampleprinter.py
@@ -52,7 +52,26 @@ def _signal_handler(self, signum, frame) -> None:  # pragma: no cover
 		print('\nAborting exampleprinter run...')
 		sys.exit(0)
 
-	def run_example(self, save_lineplots=False) -> int:
+	def _rearrange_info_dicts(self, info_dicts: list, evaluation_left_bound, evaluation_right_bound) -> dict:
+		"""
+		Re-arrange the information dictionaries of the current session into a dictionary of lists.
+
+		Args:
+			info_dicts (list): The information dictionaries to re-arrange.
+
+		Returns:
+			dict: The re-arranged information dictionaries.
+		"""
+		flattened_dicts = [ut.flatten_dict(info) for info in info_dicts]
+		info_dict = {}
+		for key in flattened_dicts[0].keys():
+			info_dict[key] = []
+		for info in flattened_dicts[evaluation_left_bound:evaluation_right_bound]:
+			for key in info.keys():
+				info_dict[key].append(info[key])
+		return info_dict
+
+	def run_example(self, save_lineplots=False, evaluation_left_bound=450, evaluation_right_bound=500) -> int:
 		"""
 		Run a specified marketplace with a (pre-trained, if RL) agent and record various statistics using TensorBoard.
 
@@ -83,12 +102,15 @@ def run_example(self, save_lineplots=False) -> int:
 			in_storages = [[] for _ in range(self.marketplace._number_of_vendors)]
 		in_circulations = []
 
+		info_dicts = []
+
 		with torch.no_grad():
 			while not is_done:
 				action = self.agent.policy(state)
 				print(state)
 				print(action)
 				state, reward, is_done, logdict = self.marketplace.step(action)
+				info_dicts.append(logdict)
 				if cumulative_dict is not None:
 					cumulative_dict = ut.add_content_of_two_dicts(cumulative_dict, logdict)
 				else:
@@ -114,15 +136,17 @@ def run_example(self, save_lineplots=False) -> int:
 			svg_manipulator.to_html()
 
 		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots:
-			self.save_step_diagrams(price_used, price_news, price_rebuy, in_storages, in_circulations, signature)
+			self.save_step_diagrams(price_used, price_news, price_rebuy, in_storages, in_circulations, signature,
+				evaluation_left_bound, evaluation_right_bound)
 
-		return our_profit
+		return our_profit, self._rearrange_info_dicts(info_dicts, evaluation_left_bound, evaluation_right_bound)
 
-	def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, in_circulations, signature) -> None:
+	def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, in_circulations, signature,
+			evaluation_left_bound, evaluation_right_bound) -> None:
 		x = np.array(range(1, self.config_market.episode_length + 1))
 		plt.step(x, in_circulations)
 		plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, 'lineplot_in_circulations.svg'))
-		plt.xlim(450, 475)
+		plt.xlim(evaluation_left_bound, evaluation_right_bound)
 		plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, 'lineplot_in_circulations_xlim.svg'), transparent=True)
 		plt.clf()
 		for data, name in [(price_used, 'price_refurbished'), (price_news, 'price_new'),
@@ -138,7 +162,7 @@ def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, i
 			elif 'in_storage' in name:
 				plt.ylim(0, 100)
 			plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, f'lineplot_{name}.svg'), transparent=True)
-			plt.xlim(450, 475)
+			plt.xlim(evaluation_left_bound, evaluation_right_bound)
 			plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, f'lineplot_{name}_xlim.svg'), transparent=True)
 			plt.clf()
 
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 1ba8ffe0..5dfada56 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -1,18 +1,85 @@
 # This is the script describing the ablation study for the paper.
 # It does not contain new framework features, but it stays in the repo to keep the experiments reproducible.
 
+import os
 import time
 from multiprocessing import Pipe, Process
 
+import numpy as np
+import pandas as pd
+
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
+from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
+from recommerce.monitoring.exampleprinter import ExamplePrinter
 from recommerce.rl.stable_baselines.sb_ppo import StableBaselinesPPO
 
 
+def create_relevant_dataframe(descriptions, info_sequences_list):
+    parameters = [
+        ('profit', lambda info_sequence: np.mean(info_sequence['profits/all/vendor_0'])),
+        ('new sales', lambda info_sequence: np.mean(info_sequence['customer/purchases_new/vendor_0'])),
+        ('refurbished sales', lambda info_sequence: np.mean(info_sequence['customer/purchases_refurbished/vendor_0'])),
+        ('rebuys', lambda info_sequence: np.mean(info_sequence['owner/rebuys/vendor_0'])),
+        ('offer price new', lambda info_sequence: np.mean(info_sequence['actions/price_new/vendor_0'])),
+        ('offer price refurbished', lambda info_sequence: np.mean(info_sequence['actions/price_refurbished/vendor_0'])),
+        ('offer price rebuy', lambda info_sequence: np.mean(info_sequence['actions/price_rebuy/vendor_0'])),
+        ('sales price new',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_new/vendor_0']) *
+            np.array(info_sequence['customer/purchases_new/vendor_0'])) /
+            np.sum(info_sequence['customer/purchases_new/vendor_0'])),
+        ('sales price refurbished',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_refurbished/vendor_0']) *
+            np.array(info_sequence['customer/purchases_refurbished/vendor_0'])) /
+            np.sum(info_sequence['customer/purchases_refurbished/vendor_0'])),
+        ('sales price rebuy',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_rebuy/vendor_0']) *
+            np.array(info_sequence['owner/rebuys/vendor_0'])) /
+            np.sum(info_sequence['owner/rebuys/vendor_0'])),
+        ('inventory level', lambda info_sequence: np.mean(info_sequence['state/in_storage/vendor_0'])),
+        ('profit competitor', lambda info_sequence: np.mean(info_sequence['profits/all/vendor_1'])),
+        ('new sales competitor', lambda info_sequence: np.mean(info_sequence['customer/purchases_new/vendor_1'])),
+        ('refurbished sales competitor', lambda info_sequence: np.mean(info_sequence['customer/purchases_refurbished/vendor_1'])),
+        ('rebuys competitor', lambda info_sequence: np.mean(info_sequence['owner/rebuys/vendor_1'])),
+        ('offer price new competitor', lambda info_sequence: np.mean(info_sequence['actions/price_new/vendor_1'])),
+        ('offer price refurbished competitor', lambda info_sequence: np.mean(info_sequence['actions/price_refurbished/vendor_1'])),
+        ('offer price rebuy competitor', lambda info_sequence: np.mean(info_sequence['actions/price_rebuy/vendor_1'])),
+        ('sales price new competitor',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_new/vendor_1']) *
+            np.array(info_sequence['customer/purchases_new/vendor_1'])) /
+            np.sum(info_sequence['customer/purchases_new/vendor_1'])),
+        ('sales price refurbished competitor',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_refurbished/vendor_1']) *
+            np.array(info_sequence['customer/purchases_refurbished/vendor_1'])) /
+            np.sum(info_sequence['customer/purchases_refurbished/vendor_1'])),
+        ('sales price rebuy competitor',
+            lambda info_sequence: np.sum(np.array(info_sequence['actions/price_rebuy/vendor_1']) *
+            np.array(info_sequence['owner/rebuys/vendor_1'])) /
+            np.sum(info_sequence['owner/rebuys/vendor_1'])),
+        ('inventory level competitor', lambda info_sequence: np.mean(info_sequence['state/in_storage/vendor_1'])),
+        ('resources in use', lambda info_sequence: np.mean(info_sequence['state/in_circulation'])),
+        ('throw away', lambda info_sequence: np.mean(info_sequence['owner/throw_away']))
+    ]
+
+    dataframe_columns = ['market configuration'] + [parameter_name for parameter_name, _ in parameters]
+
+    dataframe = pd.DataFrame(columns=dataframe_columns)
+    for description, info_sequences in zip(descriptions, info_sequences_list):
+        row = [description]
+        for parameter_name, parameter_function in parameters:
+            row.append(parameter_function(info_sequences))
+        dataframe.loc[len(dataframe)] = row
+    return dataframe
+
+
 def run_training_session(market_class, config_market, agent_class, config_rl, training_steps, number, pipe_to_parent):
     agent = agent_class(config_market, config_rl, market_class(config_market, support_continuous_action_space=True), name=f'Train{number}')
-    watcher = agent.train_agent(training_steps)
-    pipe_to_parent.send(watcher)
+    agent.train_with_default_eval(training_steps)
+    exampleprinter = ExamplePrinter(config_market)
+    marketplace = market_class(config_market, support_continuous_action_space=True)
+    exampleprinter.setup_exampleprinter(marketplace, agent)
+    profit, info_sequences = exampleprinter.run_example(save_lineplots=True)
+    pipe_to_parent.send(info_sequences)
 
 
 def run_group(market_configs, market_descriptions, training_steps, target_function=run_training_session):
@@ -29,12 +96,12 @@ def run_group(market_configs, market_descriptions, training_steps, target_functi
         time.sleep(10)
         p.start()
     print('Now I wait for the results')
-    watchers = [output.recv() for output, _ in pipes]
+    info_sequences = [output.recv() for output, _ in pipes]
     print('Now I have the results')
     for p in processes:
         p.join()
     print('All threads joined')
-    return market_descriptions, [watcher.get_progress_values_of_property('profits/all', 0) for watcher in watchers]
+    return create_relevant_dataframe(market_descriptions, info_sequences)
 
 
 def get_different_market_configs(parameter_name, values):
@@ -48,5 +115,6 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    results = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
-    print(results)
+    result_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
+    print(result_df)
+    result_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 34408433..10c557b8 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 from attrdict import AttrDict
+from stable_baselines3.common.callbacks import EvalCallback
+from stable_baselines3.common.monitor import Monitor
 
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_vendors import CircularAgent
@@ -58,6 +60,14 @@ def train_agent(self, training_steps=100001, iteration_length=500, analyze_after
 		self.model.learn(training_steps, callback=callback)
 		return callback.watcher
 
+	def train_with_default_eval(self, training_steps=100001):
+		save_path = os.path.join(PathManager.results_path, 'best_model', f'{self.name}')
+		log_path = os.path.join(PathManager.results_path, 'logs', f'{self.name}')
+		os.makedirs(log_path, exist_ok=True)
+		callback = EvalCallback(Monitor(self.marketplace, filename=log_path), best_model_save_path=save_path, log_path=log_path, render=False)
+		self.model.learn(training_steps, callback=callback)
+		return save_path
+
 	@staticmethod
 	def get_configurable_fields() -> list:
 		raise NotImplementedError
diff --git a/sqlite.db b/sqlite.db
new file mode 100644
index 0000000000000000000000000000000000000000..6536e357efc29305bd373179c6407f4b99eeb0c7
GIT binary patch
literal 20480
zcmeI%O>5gQ7{GBkO<z_>+gpy&aRoClJ?yZ{vf@Ko3tJZ0$__?J)YK4Lo-9Lq3_I<E
z?b^??Q<;=5*+!P*F5xe*^XQ3e>-kw1;ScXGbEoBvv4wK-Oq_@)60fBcLbUv`>yKp%
z{r0lsf1^L*O<_yC_<Ytm{vq0(M{)FU{Jryd^lj5%@(3V+00IagfB*srAb<b@|4U$g
zu(RJyl4$<QsnJZ=_qEeSmX}Z4!Q7PVnfCc0?GIBq?7z88<@*1J&q8}XZmiX%%UoV)
zIZQtd<-5VfRex|T-=^2G99K;&ttw)fn^;b(=JdS1*Zr78A}=TUuKqIfyV6WG&Mf-b
z`g5{mJ-0h|(SEnri@v^G)H61vQ+cWFDro&R`&D{5>E-w=pZsgmro73gv8<i4PERuB
ze#fJGUxqbJ<u6@Eg)T<ghB2n9evS9CW1WReEcKl)^654q^jYPaI+nLu&D?D)E7jE6
z-_7c}DMH|MSsObt%KFaCO#Rr2a_Tf`?RC@MCc9OYS+WXu{E9jVtEv-DC*edx009IL
zKmY**5I_I{1Q0*~fo&By427}(-`3YlT?in600IagfB*srAb<b@2rLBn{?9@{009IL
qKmY**5I_I{1Q0-A`vv&^zy0r+3K2j60R#|0009ILKmY**5cmn%CA=m8

literal 0
HcmV?d00001

diff --git a/tests/test_exampleprinter.py b/tests/test_exampleprinter.py
index 8630dbd0..3bbb55a5 100644
--- a/tests/test_exampleprinter.py
+++ b/tests/test_exampleprinter.py
@@ -56,7 +56,7 @@ def test_setup_exampleprinter():
 def test_full_episode_rule_based(marketplace, agent):
 	printer = ExamplePrinter(config_market=config_market)
 	printer.setup_exampleprinter(marketplace, agent)
-	assert printer.run_example(True) >= -5000
+	assert printer.run_example(True)[0] >= -5000
 	shutil.rmtree(PathManager.results_path)
 
 
@@ -85,7 +85,7 @@ def test_full_episode_rl_agents(marketplace, agent_class, parameters_file, confi
 		load_path=os.path.join(parameters_path, parameters_file))
 	printer = ExamplePrinter(config_market=config_market)
 	printer.setup_exampleprinter(marketplace, agent)
-	assert printer.run_example(True) >= -5000
+	assert printer.run_example(True)[0] >= -5000
 	shutil.rmtree(PathManager.results_path)
 
 

From 898194c54605c2a6fb66dc96ebd7f91362cf8156 Mon Sep 17 00:00:00 2001
From: Jan Groeneveld <jan.groeneveld@student.hpi.de>
Date: Wed, 15 Mar 2023 18:20:46 +0100
Subject: [PATCH 03/20] more experiments

---
 recommerce/rl/ablation_study.py | 23 ++++++++++++++++++++---
 tests/test_exampleprinter.py    |  2 +-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 5dfada56..da603035 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -115,6 +115,23 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    result_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
-    print(result_df)
-    result_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
+    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
+    print(storage_df)
+    storage_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
+    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=100000)
+    print(production_price_df)
+    production_price_df.to_excel(os.path.join(PathManager.results_path, 'production_price.xlsx'), index=False)
+    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=100000)
+    print(number_of_customers_df)
+    number_of_customers_df.to_excel(os.path.join(PathManager.results_path, 'number_of_customers.xlsx'), index=False)
+    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=100000)
+    print(storage_cost_df)
+    storage_cost_df.to_excel(os.path.join(PathManager.results_path, 'storage_cost.xlsx'), index=False)
+
+    # merge all dataframes
+    all_dataframes = [storage_df, production_price_df, number_of_customers_df, storage_cost_df]
+    all_dataframes = [df.set_index('market configuration') for df in all_dataframes]
+    merged_df = pd.concat(all_dataframes, axis=1)
+
+    # save merged dataframe to excel
+    merged_df.to_excel(os.path.join(PathManager.results_path, 'merged.xlsx'))
diff --git a/tests/test_exampleprinter.py b/tests/test_exampleprinter.py
index 3bbb55a5..693bde01 100644
--- a/tests/test_exampleprinter.py
+++ b/tests/test_exampleprinter.py
@@ -91,5 +91,5 @@ def test_full_episode_rl_agents(marketplace, agent_class, parameters_file, confi
 
 @pytest.mark.slow
 def test_exampleprinter_with_tensorboard():
-	assert ExamplePrinter(config_market=config_market).run_example(True) >= -5000
+	assert ExamplePrinter(config_market=config_market).run_example(True)[0] >= -5000
 	shutil.rmtree(PathManager.results_path)

From 7a35f4b17b1e63fe0db5d04850677028cd806811 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Thu, 16 Mar 2023 23:04:30 +0100
Subject: [PATCH 04/20] accomplished ablation study

---
 .../configuration_files/market_config.json    |  8 +++++-
 .../market_config_mixed_reward_function.json  |  8 +++++-
 .../market_config_only_partial_view.json      |  8 +++++-
 ...arket_config_opposite_state_invisible.json |  8 +++++-
 .../market/circular/circular_customers.py     |  6 ++--
 .../market/circular/circular_sim_market.py    | 12 ++++++--
 .../market/circular/circular_vendors.py       |  4 +--
 recommerce/market/customer.py                 |  5 ++--
 recommerce/market/linear/linear_customers.py  |  2 +-
 recommerce/market/sim_market.py               |  2 +-
 recommerce/rl/ablation_study.py               | 28 ++++++++++++++++---
 setup.cfg                                     |  1 +
 tests/test_customers.py                       | 19 +++++++------
 .../configuration_files/market_config.json    |  8 +++++-
 14 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/recommerce/default_data/configuration_files/market_config.json b/recommerce/default_data/configuration_files/market_config.json
index 3287a28f..56d0a1ed 100644
--- a/recommerce/default_data/configuration_files/market_config.json
+++ b/recommerce/default_data/configuration_files/market_config.json
@@ -9,5 +9,11 @@
 	"storage_cost_per_product": 0.1,
 	"opposite_own_state_visibility": true,
 	"common_state_visibility": true,
-	"reward_mixed_profit_and_difference": false
+	"reward_mixed_profit_and_difference": false,
+    "compared_value_old": 0.55,
+    "upper_tolerance_old": 5.0,
+    "upper_tolerance_new": 8.0,
+    "share_interested_owners": 0.05,
+    "competitor_lowest_storage_level": 6.5,
+    "competitor_ok_storage_level": 12.5
 }
diff --git a/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json b/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json
index bb239e98..b846ac8b 100644
--- a/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json
+++ b/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json
@@ -9,5 +9,11 @@
 	"storage_cost_per_product": 0.1,
 	"opposite_own_state_visibility": true,
 	"common_state_visibility": true,
-	"reward_mixed_profit_and_difference": true
+	"reward_mixed_profit_and_difference": true,
+    "compared_value_old": 0.55,
+    "upper_tolerance_old": 5,
+    "upper_tolerance_new": 8,
+    "share_interested_owners": 0.05,
+    "competitor_lowest_storage_level": 6.5,
+    "competitor_ok_storage_level": 12.5
 }
diff --git a/recommerce/default_data/configuration_files/market_config_only_partial_view.json b/recommerce/default_data/configuration_files/market_config_only_partial_view.json
index ca864a02..65a768e8 100644
--- a/recommerce/default_data/configuration_files/market_config_only_partial_view.json
+++ b/recommerce/default_data/configuration_files/market_config_only_partial_view.json
@@ -9,5 +9,11 @@
 	"storage_cost_per_product": 0.1,
 	"opposite_own_state_visibility": false,
 	"common_state_visibility": false,
-	"reward_mixed_profit_and_difference": false
+	"reward_mixed_profit_and_difference": false,
+    "compared_value_old": 0.55,
+    "upper_tolerance_old": 5,
+    "upper_tolerance_new": 8,
+    "share_interested_owners": 0.05,
+    "competitor_lowest_storage_level": 6.5,
+    "competitor_ok_storage_level": 12.5
 }
diff --git a/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json b/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json
index 10ecbb13..b95f3b50 100644
--- a/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json
+++ b/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json
@@ -9,5 +9,11 @@
 	"storage_cost_per_product": 0.1,
 	"opposite_own_state_visibility": false,
 	"common_state_visibility": true,
-	"reward_mixed_profit_and_difference": false
+	"reward_mixed_profit_and_difference": false,
+    "compared_value_old": 0.55,
+    "upper_tolerance_old": 5,
+    "upper_tolerance_new": 8,
+    "share_interested_owners": 0.05,
+    "competitor_lowest_storage_level": 6.5,
+    "competitor_ok_storage_level": 12.5
 }
diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index be2858c0..8000e565 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -5,7 +5,7 @@
 
 
 class CustomerCircular(Customer):
-	def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:
+	def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array:
 		"""
 		This method calculates the purchase probability for each vendor in a linear setup.
 		It is assumed that all vendors do have the same quality and same reputation.
@@ -27,8 +27,8 @@ def generate_purchase_probabilities_from_offer(self, common_state, vendor_specif
 			price_new = vendor_actions[vendor_idx][1] + 1
 			assert price_refurbished >= 1 and price_new >= 1, 'price_refurbished and price_new need to be >= 1'
 
-			ratio_old = 5.5 / price_refurbished - np.exp(price_refurbished - 5)
-			ratio_new = 10 / price_new - np.exp(price_new - 8)
+			ratio_old = market_config.compared_value_old * 10 / price_refurbished - np.exp(price_refurbished - market_config.upper_tolerance_old)
+			ratio_new = 10 / price_new - np.exp(price_new - market_config.upper_tolerance_new)
 			preferences += [ratio_old, ratio_new]
 
 		return ut.softmax(np.array(preferences))
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index c2cf376c..99c041bf 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -6,7 +6,7 @@
 import recommerce.configuration.utils as ut
 import recommerce.market.circular.circular_vendors as circular_vendors
 import recommerce.market.owner as owner
-from recommerce.configuration.common_rules import greater_zero_even_rule, greater_zero_rule, non_negative_rule
+from recommerce.configuration.common_rules import between_zero_one_rule, greater_zero_even_rule, greater_zero_rule, non_negative_rule
 from recommerce.market.circular.circular_customers import CustomerCircular
 from recommerce.market.customer import Customer
 from recommerce.market.owner import Owner
@@ -32,7 +32,13 @@ def get_configurable_fields() -> list:
 			('storage_cost_per_product', (int, float), non_negative_rule),
 			('opposite_own_state_visibility', bool, None),
 			('common_state_visibility', bool, None),
-			('reward_mixed_profit_and_difference', bool, None)
+			('reward_mixed_profit_and_difference', bool, None),
+			('compared_value_old', float, greater_zero_rule),
+			('upper_tolerance_old', float, greater_zero_rule),
+			('upper_tolerance_new', float, greater_zero_rule),
+			('share_interested_owners', float, between_zero_one_rule),
+			('competitor_lowest_storage_level', float, greater_zero_rule),
+			('competitor_ok_storage_level', float, greater_zero_rule)
 		]
 
 	def _setup_action_observation_space(self, support_continuous_action_space: bool) -> None:
@@ -123,7 +129,7 @@ def _simulate_owners(self, profits) -> None:
 		assert len(return_probabilities) == 2 + self._number_of_vendors, \
 			'the length of return_probabilities must be the number of vendors plus 2'
 
-		number_of_owners = int(0.05 * self.in_circulation / self._number_of_vendors)
+		number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors)
 		owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist()
 
 		# owner decisions can be as follows:
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index 92fbed50..d651f7e4 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -160,11 +160,11 @@ def policy(self, observation, *_) -> tuple:
 
 		price_new = max(min(competitors_new_prices) - 1, self.config_market.production_price + 1)
 		# competitor's storage is ignored
-		if own_storage < self.config_market.max_storage / 15:
+		if own_storage < self.config_market.competitor_lowest_storage_level:
 			# fill up the storage immediately
 			price_refurbished = min(competitors_refurbished_prices) + 1
 			rebuy_price = max(min(competitors_rebuy_prices) + 1, 2)
-		elif own_storage < self.config_market.max_storage / 8:
+		elif own_storage < self.config_market.competitor_ok_storage_level:
 			# storage content is ok
 			rebuy_price = max(min(competitors_rebuy_prices) - 1, 0.25)
 			price_refurbished = max(min(competitors_refurbished_prices) - 1, rebuy_price + 1)
diff --git a/recommerce/market/customer.py b/recommerce/market/customer.py
index ebfdc615..709bf22d 100644
--- a/recommerce/market/customer.py
+++ b/recommerce/market/customer.py
@@ -5,7 +5,8 @@
 
 class Customer(ABC):
 	@abstractmethod
-	def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:  # pragma: no cover
+	def generate_purchase_probabilities_from_offer(self, market_config, common_state,
+						vendor_specific_state, vendor_actions) -> np.array:  # pragma: no cover
 		"""
 		This method receives the state of the market and uses it as a list of offers.
 		It returns the purchase probability for all vendors.
@@ -19,4 +20,4 @@ def generate_purchase_probabilities_from_offer(self, common_state, vendor_specif
 			In the subsequent fields, there are the probabilites for buying the specific offers from the vendor.
 			Look subclass implementation for more details.
 		"""
-		raise NotImplementedError('This method is abstract. Use a subclass')
+		raise NotImplementedError('This method is abstract. Use a subclass')
diff --git a/recommerce/market/linear/linear_customers.py b/recommerce/market/linear/linear_customers.py
index 8a818923..5b64f04f 100644
--- a/recommerce/market/linear/linear_customers.py
+++ b/recommerce/market/linear/linear_customers.py
@@ -5,7 +5,7 @@
 
 
 class CustomerLinear(Customer):
-	def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:
+	def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array:
 		"""
 		This method calculates the purchase probability for each vendor in a linear setup.
 		Quality values are used to calculate a ratio.
diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py
index 7e73a48c..708403ab 100644
--- a/recommerce/market/sim_market.py
+++ b/recommerce/market/sim_market.py
@@ -117,7 +117,7 @@ def _simulate_customers(self, profits, number_of_customers) -> None:
 			number_of_customers (int): the number of customers eager to buy each step.
 		"""
 		probability_distribution = self._customer.generate_purchase_probabilities_from_offer(
-			self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions)
+			self.config, self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions)
 		assert isinstance(probability_distribution, np.ndarray), 'generate_purchase_probabilities_from_offer must return an np.ndarray'
 		assert self._is_probability_distribution_fitting_exactly(probability_distribution)
 
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index da603035..a001e590 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -115,18 +115,38 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
+    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=11000)
     print(storage_df)
     storage_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
-    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=100000)
+    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=11000)
     print(production_price_df)
     production_price_df.to_excel(os.path.join(PathManager.results_path, 'production_price.xlsx'), index=False)
-    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=100000)
+    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=11000)
     print(number_of_customers_df)
     number_of_customers_df.to_excel(os.path.join(PathManager.results_path, 'number_of_customers.xlsx'), index=False)
-    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=100000)
+    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=11000)
     print(storage_cost_df)
     storage_cost_df.to_excel(os.path.join(PathManager.results_path, 'storage_cost.xlsx'), index=False)
+    compared_value_old_df = run_group(*get_different_market_configs('compared_value_old', [0.4, 0.55, 0.6]), training_steps=11000)
+    print(compared_value_old_df)
+    compared_value_old_df.to_excel(os.path.join(PathManager.results_path, 'compared_value_old.xlsx'), index=False)
+    upper_tolerance_old_df = run_group(*get_different_market_configs('upper_tolerance_old', [4.0, 5.0, 6.0]), training_steps=11000)
+    print(upper_tolerance_old_df)
+    upper_tolerance_old_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_old.xlsx'), index=False)
+    upper_tolerance_new_df = run_group(*get_different_market_configs('upper_tolerance_new', [7.0, 8.0, 9.0]), training_steps=11000)
+    print(upper_tolerance_new_df)
+    upper_tolerance_new_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_new.xlsx'), index=False)
+    share_interested_owners_df = run_group(*get_different_market_configs('share_interested_owners', [0.025, 0.05, 0.075]),
+                                           training_steps=11000)
+    print(share_interested_owners_df)
+    share_interested_owners_df.to_excel(os.path.join(PathManager.results_path, 'share_interested_owners.xlsx'), index=False)
+    competitor_lowest_storage_level_df = run_group(*get_different_market_configs('competitor_lowest_storage_level', [4.5, 6.5, 8.5]),
+                                                   training_steps=11000)
+    print(competitor_lowest_storage_level_df)
+    competitor_lowest_storage_level_df.to_excel(os.path.join(PathManager.results_path, 'competitor_lowest_storage_level.xlsx'),
+                                                index=False)
+    competitor_ok_storage_level_df = run_group(*get_different_market_configs('competitor_ok_storage_level', [9.5, 12.5, 15.5]),
+                                               training_steps=11000)
 
     # merge all dataframes
     all_dataframes = [storage_df, production_price_df, number_of_customers_df, storage_cost_df]
diff --git a/setup.cfg b/setup.cfg
index 167f033c..02038ed7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -29,6 +29,7 @@ install_requires =
 	names>=0.3.0
 	scipy>=1.8.0
 	attrdict>=2.0.1
+    openpyxl>=3.1.2
 python_requires = >=3.8
 
 [options.extras_require]
diff --git a/tests/test_customers.py b/tests/test_customers.py
index 660b8d68..956d3ee5 100644
--- a/tests/test_customers.py
+++ b/tests/test_customers.py
@@ -16,7 +16,8 @@
 # Test the Customer parent class, i.e. make sure it cannot be used
 def test_customer_parent_class():
 	with pytest.raises(NotImplementedError) as assertion_message:
-		customer.Customer.generate_purchase_probabilities_from_offer(CustomerLinear, *random_offer(linear_market.LinearEconomyDuopoly))
+		customer.Customer.generate_purchase_probabilities_from_offer(CustomerLinear, config_market,
+			*random_offer(linear_market.LinearEconomyDuopoly))
 	assert 'This method is abstract. Use a subclass' in str(assertion_message.value)
 
 
@@ -42,7 +43,7 @@ def test_customer_parent_class():
 	generate_purchase_probabilities_from_offer_testcases)
 def test_generate_purchase_probabilities_from_offer(customer, common_state, vendor_specific_state, vendor_actions, expected_message):
 	with pytest.raises(AssertionError) as assertion_message:
-		customer.generate_purchase_probabilities_from_offer(customer, common_state, vendor_specific_state, vendor_actions)
+		customer.generate_purchase_probabilities_from_offer(customer, config_market, common_state, vendor_specific_state, vendor_actions)
 	assert expected_message in str(assertion_message.value)
 
 
@@ -58,7 +59,7 @@ def test_generate_purchase_probabilities_from_offer(customer, common_state, vend
 @pytest.mark.parametrize('customer, market', customer_action_range_testcases)
 def test_customer_action_range(customer, market):
 	offers = random_offer(market)
-	probability_distribution = customer.generate_purchase_probabilities_from_offer(customer, *offers)
+	probability_distribution = customer.generate_purchase_probabilities_from_offer(customer, config_market, *offers)
 	assert len(probability_distribution) == market(config=config_market)._get_number_of_vendors() * \
 		(1 if issubclass(market, linear_market.LinearEconomy) else 2) + 1
 
@@ -66,14 +67,14 @@ def test_customer_action_range(customer, market):
 def test_linear_higher_price_lower_purchase_probability():
 	common_state, vendor_specific_state, vendor_actions = np.array([]), [[12], [12]], [3, 5]
 	probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer(
-		CustomerLinear, common_state, vendor_specific_state, vendor_actions)
+		CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions)
 	assert probability_distribution[1] > probability_distribution[2]
 
 
 def test_linear_higher_quality_higher_purchase_probability():
 	common_state, vendor_specific_state, vendor_actions = np.array([]), [[13], [12]], [3, 3]
 	probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer(
-		CustomerLinear, common_state, vendor_specific_state, vendor_actions)
+		CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions)
 	assert probability_distribution[1] > probability_distribution[2]
 
 
@@ -81,17 +82,17 @@ def test_equal_ratio_equal_purchase_probability():
 	# In the following line: [3, 1] means prices [4, 2]
 	common_state, vendor_specific_state, vendor_actions = np.array([]), [[16], [8]], [3, 1]
 	probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer(
-		CustomerLinear, common_state, vendor_specific_state, vendor_actions)
+		CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions)
 	assert probability_distribution[1] == probability_distribution[2]
 
 
 def test_linear_lower_overall_price_lower_nothing_probability():
 	common_state1, vendor_specific_state1, vendor_actions1 = np.array([]), [[15], [15]], [3, 3]
 	probability_distribution1 = CustomerLinear.generate_purchase_probabilities_from_offer(
-		CustomerLinear, common_state1, vendor_specific_state1, vendor_actions1)
+		CustomerLinear, config_market, common_state1, vendor_specific_state1, vendor_actions1)
 	common_state2, vendor_specific_state2, vendor_actions2 = np.array([]), [[15], [15]], [4, 4]
 	probability_distribution2 = CustomerLinear.generate_purchase_probabilities_from_offer(
-		CustomerLinear, common_state2, vendor_specific_state2, vendor_actions2)
+		CustomerLinear, config_market, common_state2, vendor_specific_state2, vendor_actions2)
 	print(probability_distribution1)
 	print(probability_distribution2)
 	assert probability_distribution1[0] < probability_distribution2[0]
@@ -102,7 +103,7 @@ def test_linear_lower_overall_price_lower_nothing_probability():
 def test_circular_higher_price_lower_purchase_probability():
 	common_state, vendor_specific_state, vendor_actions = np.array([]), [[17], [23]], [[3, 6], [4, 5]]
 	probability_distribution = CustomerCircular.generate_purchase_probabilities_from_offer(
-		CustomerCircular, common_state, vendor_specific_state, vendor_actions)
+		CustomerCircular, config_market, common_state, vendor_specific_state, vendor_actions)
 	assert probability_distribution[1] > probability_distribution[3]
 	assert probability_distribution[2] < probability_distribution[4]
 
diff --git a/tests/test_data/configuration_files/market_config.json b/tests/test_data/configuration_files/market_config.json
index 25031a42..cede30c8 100644
--- a/tests/test_data/configuration_files/market_config.json
+++ b/tests/test_data/configuration_files/market_config.json
@@ -8,5 +8,11 @@
 	"storage_cost_per_product": 0.1,
 	"opposite_own_state_visibility": true,
 	"common_state_visibility": true,
-	"reward_mixed_profit_and_difference": false
+	"reward_mixed_profit_and_difference": false,
+    "compared_value_old": 0.55,
+    "upper_tolerance_old": 5.0,
+    "upper_tolerance_new": 8.0,
+    "share_interested_owners": 0.05,
+    "competitor_lowest_storage_level": 6.5,
+    "competitor_ok_storage_level": 12.5
 }

From a8f63a74c18b4e48205471b1958c8cb338e28e96 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Thu, 16 Mar 2023 23:08:21 +0100
Subject: [PATCH 05/20] disabled webserver tests

---
 .github/workflows/Pre-commmit and Tests.yml | 52 ++++++++++-----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/Pre-commmit and Tests.yml b/.github/workflows/Pre-commmit and Tests.yml
index 67faaa14..3db7d252 100644
--- a/.github/workflows/Pre-commmit and Tests.yml	
+++ b/.github/workflows/Pre-commmit and Tests.yml	
@@ -20,32 +20,32 @@ jobs:
       - name: Pre-commit
         uses: pre-commit/action@v2.0.3
 
-  webserver-tests:
-    needs: pre-commit
-    runs-on: [self-hosted, ubuntu-20.04]
-    name: Webserver tests
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Install Recommerce package
-        shell: bash -l {0}
-        run: |
-          pip install -e .[cpu]
-      - name: Setup Recommerce
-        shell: bash -l {0}
-        run: |
-          recommerce --get-defaults-unpack
-      - name: Run Webserver tests
-        env:
-          SECRET_KEY: 'fake_secret_key'
-          API_TOKEN: 'fake_api_token'
-        shell: bash -l {0}
-        run: |
-          (cd ./webserver && python ./manage.py test -v 2)
+  # webserver-tests:
+  #   needs: pre-commit
+  #   runs-on: [self-hosted, ubuntu-20.04]
+  #   name: Webserver tests
+  #   steps:
+  #     - name: Checkout
+  #       uses: actions/checkout@v2
+  #     - name: Set up Python 3.8
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: 3.8
+  #     - name: Install Recommerce package
+  #       shell: bash -l {0}
+  #       run: |
+  #         pip install -e .[cpu]
+  #     - name: Setup Recommerce
+  #       shell: bash -l {0}
+  #       run: |
+  #         recommerce --get-defaults-unpack
+  #     - name: Run Webserver tests
+  #       env:
+  #         SECRET_KEY: 'fake_secret_key'
+  #         API_TOKEN: 'fake_api_token'
+  #       shell: bash -l {0}
+  #       run: |
+  #         (cd ./webserver && python ./manage.py test -v 2)
 
   recommerce-tests:
     needs: webserver-tests

From c1a54bdf8a5cfa51e4ecfe72b17b5c891f336c14 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Thu, 16 Mar 2023 23:18:38 +0100
Subject: [PATCH 06/20] disabled webserver 2

---
 .github/workflows/Pre-commmit and Tests.yml |  2 +-
 recommerce/rl/ablation_study.py             | 24 +++++++++++----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/Pre-commmit and Tests.yml b/.github/workflows/Pre-commmit and Tests.yml
index 3db7d252..307224fe 100644
--- a/.github/workflows/Pre-commmit and Tests.yml	
+++ b/.github/workflows/Pre-commmit and Tests.yml	
@@ -48,7 +48,7 @@ jobs:
   #         (cd ./webserver && python ./manage.py test -v 2)
 
   recommerce-tests:
-    needs: webserver-tests
+    needs: pre-commit
     runs-on: [self-hosted, ubuntu-20.04]
     name: Recommerce tests
     timeout-minutes: 30
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index a001e590..32a7927d 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -115,41 +115,43 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=11000)
+    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
     print(storage_df)
     storage_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
-    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=11000)
+    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=100000)
     print(production_price_df)
     production_price_df.to_excel(os.path.join(PathManager.results_path, 'production_price.xlsx'), index=False)
-    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=11000)
+    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=100000)
     print(number_of_customers_df)
     number_of_customers_df.to_excel(os.path.join(PathManager.results_path, 'number_of_customers.xlsx'), index=False)
-    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=11000)
+    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=100000)
     print(storage_cost_df)
     storage_cost_df.to_excel(os.path.join(PathManager.results_path, 'storage_cost.xlsx'), index=False)
-    compared_value_old_df = run_group(*get_different_market_configs('compared_value_old', [0.4, 0.55, 0.6]), training_steps=11000)
+    compared_value_old_df = run_group(*get_different_market_configs('compared_value_old', [0.4, 0.55, 0.6]), training_steps=100000)
     print(compared_value_old_df)
     compared_value_old_df.to_excel(os.path.join(PathManager.results_path, 'compared_value_old.xlsx'), index=False)
-    upper_tolerance_old_df = run_group(*get_different_market_configs('upper_tolerance_old', [4.0, 5.0, 6.0]), training_steps=11000)
+    upper_tolerance_old_df = run_group(*get_different_market_configs('upper_tolerance_old', [4.0, 5.0, 6.0]), training_steps=100000)
     print(upper_tolerance_old_df)
     upper_tolerance_old_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_old.xlsx'), index=False)
-    upper_tolerance_new_df = run_group(*get_different_market_configs('upper_tolerance_new', [7.0, 8.0, 9.0]), training_steps=11000)
+    upper_tolerance_new_df = run_group(*get_different_market_configs('upper_tolerance_new', [7.0, 8.0, 9.0]), training_steps=100000)
     print(upper_tolerance_new_df)
     upper_tolerance_new_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_new.xlsx'), index=False)
     share_interested_owners_df = run_group(*get_different_market_configs('share_interested_owners', [0.025, 0.05, 0.075]),
-                                           training_steps=11000)
+                                           training_steps=100000)
     print(share_interested_owners_df)
     share_interested_owners_df.to_excel(os.path.join(PathManager.results_path, 'share_interested_owners.xlsx'), index=False)
     competitor_lowest_storage_level_df = run_group(*get_different_market_configs('competitor_lowest_storage_level', [4.5, 6.5, 8.5]),
-                                                   training_steps=11000)
+                                                   training_steps=100000)
     print(competitor_lowest_storage_level_df)
     competitor_lowest_storage_level_df.to_excel(os.path.join(PathManager.results_path, 'competitor_lowest_storage_level.xlsx'),
                                                 index=False)
     competitor_ok_storage_level_df = run_group(*get_different_market_configs('competitor_ok_storage_level', [9.5, 12.5, 15.5]),
-                                               training_steps=11000)
+                                               training_steps=100000)
 
     # merge all dataframes
-    all_dataframes = [storage_df, production_price_df, number_of_customers_df, storage_cost_df]
+    all_dataframes = [storage_df, production_price_df, number_of_customers_df, storage_cost_df, compared_value_old_df,
+                    upper_tolerance_old_df, upper_tolerance_new_df, share_interested_owners_df,
+                    competitor_lowest_storage_level_df, competitor_ok_storage_level_df]
     all_dataframes = [df.set_index('market configuration') for df in all_dataframes]
     merged_df = pd.concat(all_dataframes, axis=1)
 

From 5b605bf8a6665d0f5c4410c3c71d8b54cc5a1569 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Fri, 17 Mar 2023 21:28:53 +0100
Subject: [PATCH 07/20] better environment fit

---
 .../market/circular/circular_customers.py     | 43 +++++++++
 .../market/circular/circular_sim_market.py    |  7 +-
 .../market/circular/circular_vendors.py       | 87 +++++++++++++++++++
 recommerce/market/owner.py                    | 40 +++++++++
 recommerce/market/samples_generation.py       | 25 ++++++
 recommerce/market/sim_market.py               | 42 ++++++++-
 recommerce/monitoring/exampleprinter.py       | 26 +++---
 recommerce/rl/ablation_study.py               |  2 +-
 .../stable_baselines_model.py                 | 15 +++-
 recommerce/rl/training_scenario.py            |  6 +-
 setup.cfg                                     |  1 +
 11 files changed, 274 insertions(+), 20 deletions(-)
 create mode 100644 recommerce/market/samples_generation.py

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index 8000e565..c2df5e41 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -1,6 +1,11 @@
+import os
+
 import numpy as np
+import pandas as pd
+from sklearn.linear_model import LinearRegression
 
 import recommerce.configuration.utils as ut
+from recommerce.configuration.path_manager import PathManager
 from recommerce.market.customer import Customer
 
 
@@ -32,3 +37,41 @@ def generate_purchase_probabilities_from_offer(self, market_config, common_state
 			preferences += [ratio_old, ratio_new]
 
 		return ut.softmax(np.array(preferences))
+
+
+class LinearRegressionCustomer(Customer):
+	def __init__(self) -> None:
+		customers_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'customers_dataframe.xlsx'))
+		customers_dataframe = customers_dataframe.iloc[-50000:, :]
+		print('Dataset read')
+		X = customers_dataframe.iloc[:, 0:6].values
+		X_dash_list = []
+		print('Now I start to construct the binary features')
+		for price_threshhold in range(10):
+			# iterate throw the columns
+			for i_feature, column in enumerate(X.T):
+				column_values = np.where(column > price_threshhold, 1, 0)
+				# append the new column to X
+				X_dash_list.append(column_values.reshape(-1, 1))
+		X_dash = np.concatenate(X_dash_list, axis=1)
+		X = np.concatenate((X, X_dash), axis=1)
+		Y = customers_dataframe.iloc[:, 6:11].values
+		self.regressor = LinearRegression()
+		self.regressor.fit(X, Y)
+		print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}')
+
+		prediction = self.regressor.predict(X)
+		print(f'LinearRegressionCustomer: prediction = {prediction}')
+		customers_dataframe['buy nothing predicted'] = prediction[:, 0]
+		customers_dataframe['buy new agent predicted'] = prediction[:, 1]
+		customers_dataframe['buy refurbished agent predicted'] = prediction[:, 2]
+		customers_dataframe['buy new competitor predicted'] = prediction[:, 3]
+		customers_dataframe['buy refurbished competitor predicted'] = prediction[:, 4]
+		customers_dataframe.to_excel(os.path.join(PathManager.data_path, 'customers_dataframe_predicted.xlsx'), index=False)
+
+	def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array:
+		return np.array([0, 0, 0, 0])
+
+
+if __name__ == '__main__':
+	LinearRegressionCustomer()
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index 99c041bf..d11873a8 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -123,8 +123,9 @@ def _simulate_owners(self, profits) -> None:
 			profits (np.array(int)): The profits of the vendor.
 		"""
 		assert self._owner is not None, 'an owner must be set'
+		common_state_array = self._get_common_state_array()
 		return_probabilities = self._owner.generate_return_probabilities_from_offer(
-			self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions)
+			common_state_array, self.vendor_specific_state, self.vendor_actions)
 		assert isinstance(return_probabilities, np.ndarray), 'return_probabilities must be an np.ndarray'
 		assert len(return_probabilities) == 2 + self._number_of_vendors, \
 			'the length of return_probabilities must be the number of vendors plus 2'
@@ -132,6 +133,10 @@ def _simulate_owners(self, profits) -> None:
 		number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors)
 		owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist()
 
+		if self.document_for_regression:
+			new_row = self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + owner_decisions
+			self.owners_dataframe.loc[len(self.owners_dataframe)] = new_row
+
 		# owner decisions can be as follows:
 		# 0: Hold/Do nothing
 		# 1: Throw away
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index d651f7e4..3c78a9cc 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -1,9 +1,15 @@
+import os
+import random
 from abc import ABC
 from statistics import median
 
 import numpy as np
+import pandas as pd
 from attrdict import AttrDict
+from sklearn.linear_model import LinearRegression
+from tqdm import tqdm
 
+from recommerce.configuration.path_manager import PathManager
 from recommerce.market.vendors import Agent, FixedPriceAgent, HumanPlayer, RuleBasedAgent
 
 
@@ -176,6 +182,83 @@ def policy(self, observation, *_) -> tuple:
 		return (self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price))
 
 
+class RuleBasedCERebuyAgentSampleCollector(RuleBasedAgent, CircularAgent):
+	"""
+	This vendor's policy is aiming to succeed by undercutting the competitor's prices.
+	"""
+	def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False):
+		self.continuous_action_space = continuous_action_space
+		self.name = name if name != '' else type(self).__name__
+		self.config_market = config_market
+
+	def policy(self, observation, *_) -> tuple:
+		assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray'
+		# TODO: find a proper way asserting the length of observation (as implemented in AC & QLearning via passing marketplace)
+
+		# in_circulation is ignored
+		own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item()
+		competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True)
+
+		price_new = max(min(competitors_new_prices) - 1, self.config_market.production_price + 1)
+		# competitor's storage is ignored
+		if own_storage < self.config_market.competitor_lowest_storage_level + random.randint(-3, 3):
+			# fill up the storage immediately
+			price_refurbished = min(competitors_refurbished_prices) + random.randint(0, 3)
+			rebuy_price = max(min(competitors_rebuy_prices) + random.randint(0, 3), 2 if random.random() < 0.8 else 0)
+		elif own_storage < self.config_market.competitor_ok_storage_level + random.randint(-3, 3):
+			# storage content is ok
+			rebuy_price = max(min(competitors_rebuy_prices) - random.randint(0, 2), 0.25)
+			price_refurbished = max(min(competitors_refurbished_prices) - random.randint(0, 2), rebuy_price + random.randint(0, 2))
+		else:
+			# storage too full, we need to get rid of some refurbished products
+			rebuy_price = max(min(competitors_rebuy_prices) - random.randint(1, 4), 0)
+			price_refurbished = max(round(np.quantile(competitors_refurbished_prices, 0.75)) - random.randint(1, 4),
+				rebuy_price + random.randint(0, 3))
+
+		return np.array((self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price))
+			if random.random() < 0.8 else (random.randint(0, 10), random.randint(0, 10), random.randint(0, 10)))
+
+
+class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
+	"""
+	This vendor's policy is aiming to succeed by undercutting the competitor's prices.
+	"""
+	def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False):
+		self.continuous_action_space = continuous_action_space
+		self.name = name if name != '' else type(self).__name__
+		competitor_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
+		X = competitor_dataframe.iloc[:, 0:3].values
+		X_dash_list = []
+		print('Now I start to construct the binary features')
+		for price_threshhold in range(10):
+			# iterate throw the columns
+			for i_feature, column in tqdm(enumerate(X.T)):
+				column_values = np.where(column > price_threshhold, 1, 0)
+				# append the new column to X
+				X_dash_list.append(column_values.reshape(-1, 1))
+
+		X_dash = np.concatenate(X_dash_list, axis=1)
+		X = np.concatenate((X, X_dash), axis=1)
+		print(X)
+		print(X.shape)
+		# define Y as the last 3 columns
+		Y = competitor_dataframe.iloc[:, 3:6].values
+		self.regressor = LinearRegression()
+		self.regressor.fit(X, Y)
+		print(f'LinearRegressionCERebuyAgent: {self.regressor.score(X, Y)}')
+
+		predictions = self.regressor.predict(X)
+		print(predictions)
+		print(predictions.shape)
+		competitor_dataframe['predicted_refurbished_price'] = predictions[:, 0]
+		competitor_dataframe['predicted_new_price'] = predictions[:, 1]
+		competitor_dataframe['predicted_rebuy_price'] = predictions[:, 2]
+		competitor_dataframe.to_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False)
+
+	def policy(self, observation, *_) -> tuple:
+		assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray'
+
+
 class RuleBasedCERebuyAgentStorageMinimizer(RuleBasedAgent, CircularAgent):
 	"""
 	This vendor's policy reacts to the competitors' prices and minimizes the usage of storage.
@@ -206,3 +289,7 @@ def policy(self, observation, *_) -> tuple:
 			price_refurbished = int(np.quantile(competitors_refurbished_prices, 0.25))
 
 		return (self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price))
+
+
+if __name__ == '__main__':
+	LinearRegressionCERebuyAgent(None)
diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py
index ab32cf8d..fbdde830 100644
--- a/recommerce/market/owner.py
+++ b/recommerce/market/owner.py
@@ -1,8 +1,12 @@
+import os
 from abc import ABC, abstractmethod
 
 import numpy as np
+import pandas as pd
+from sklearn.linear_model import LinearRegression
 
 import recommerce.configuration.utils as ut
+from recommerce.configuration.path_manager import PathManager
 
 
 class Owner(ABC):
@@ -109,3 +113,39 @@ def generate_return_probabilities_from_offer(self, common_state, vendor_specific
 		discard_preference = lowest_purchase_offer - best_rebuy_price
 
 		return ut.softmax(np.array([holding_preference, discard_preference] + return_preferences))
+
+
+class LinearRegressionOwner(Owner):
+	def __init__(self):
+		owner_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'owners_dataframe.xlsx'))
+		X = owner_dataframe.iloc[:, 0:6].values
+		X_dash_list = []
+		print('Now I start to construct the binary features')
+		for price_threshhold in range(10):
+			# iterate throw the columns
+			for i_feature, column in enumerate(X.T):
+				column_values = np.where(column > price_threshhold, 1, 0)
+				# append the new column to X
+				X_dash_list.append(column_values.reshape(-1, 1))
+		X_dash = np.concatenate(X_dash_list, axis=1)
+		X = np.concatenate((X, X_dash), axis=1)
+		Y = owner_dataframe.iloc[:, 6:10].values
+		self.regressor = LinearRegression()
+		self.regressor.fit(X, Y)
+		print(f'LinearRegressionOwner: R^2 = {self.regressor.score(X, Y)}')
+
+		# create a new dataframe with predictions and prediction in the column
+		predictions = self.regressor.predict(X)
+		owner_dataframe['predicted holding'] = predictions[:, 0]
+		owner_dataframe['predicted throw away'] = predictions[:, 1]
+		owner_dataframe['predicted agent rebuy'] = predictions[:, 2]
+		owner_dataframe['predicted competitor rebuy'] = predictions[:, 3]
+		# save the dataframe to a new excel file
+		owner_dataframe.to_excel(os.path.join(PathManager.data_path, 'owners_dataframe_predicted.xlsx'), index=False)
+
+	def generate_return_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:
+		return np.array([0, 0, 0, 0])
+
+
+if __name__ == '__main__':
+	LinearRegressionOwner()
diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py
new file mode 100644
index 00000000..3eb06952
--- /dev/null
+++ b/recommerce/market/samples_generation.py
@@ -0,0 +1,25 @@
+import os
+
+from tqdm import tqdm
+
+from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
+from recommerce.configuration.path_manager import PathManager
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
+from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector
+from recommerce.monitoring.exampleprinter import ExamplePrinter
+
+if __name__ == '__main__':
+	config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
+	exampleprinter = ExamplePrinter(config_market)
+	agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True)
+	marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True)
+	exampleprinter.setup_exampleprinter(marketplace, agent)
+	for _ in tqdm(range(20)):
+		exampleprinter.run_example(False)
+	print('Saving customers dataframe...')
+	marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, 'customers_dataframe_generated.xlsx'), index=False)
+	print('Saving owners dataframe...')
+	marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, 'owners_dataframe_generated.xlsx'), index=False)
+	print('Saving reaction dataframe...')
+	marketplace.competitor_reaction_dataframe.to_excel(
+		os.path.join(PathManager.results_path, 'competitor_reaction_dataframe_generated.xlsx'), index=False)
diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py
index 708403ab..64897bd2 100644
--- a/recommerce/market/sim_market.py
+++ b/recommerce/market/sim_market.py
@@ -3,6 +3,7 @@
 
 import gym
 import numpy as np
+import pandas as pd
 from attrdict import AttrDict
 
 from recommerce.configuration.json_configurable import JSONConfigurable
@@ -45,7 +46,12 @@ def get_possible_rl_agents() -> list:
 	def get_competitor_classes() -> list:
 		raise NotImplementedError
 
-	def __init__(self, config: AttrDict, support_continuous_action_space: bool = False, competitors: list = None) -> None:
+	def __init__(
+			self,
+			config: AttrDict,
+			support_continuous_action_space: bool = False,
+			competitors: list = None,
+			document_for_regression: bool = False) -> None:
 		"""
 		Initialize a SimMarket instance.
 		Set up needed values such as competitors and action/observation-space and reset the environment.
@@ -70,6 +76,33 @@ def __init__(self, config: AttrDict, support_continuous_action_space: bool = Fal
 			'You cannot use the mixed profit and difference reward in a monopoly market'
 		self.reset()
 
+		self.document_for_regression = document_for_regression
+		if self.document_for_regression:
+			pandas_state_columns = [
+				'own price refurbished',
+				'own price new',
+				'own rebuy price',
+				'competitor price refurbished',
+				'competitor price new',
+				'competitor rebuy price',
+			]
+			purchases_pandas_state_columns = [
+				'buy nothing',
+				'buy new agent',
+				'buy refurbished agent',
+				'buy new competitor',
+				'buy refurbished competitor',
+			]
+			owner_pandas_state_columns = [
+				'product holding',
+				'product throw away',
+				'rebuy agent',
+				'rebuy competitor',
+			]
+			self.customers_dataframe = pd.DataFrame(columns=pandas_state_columns + purchases_pandas_state_columns)
+			self.owners_dataframe = pd.DataFrame(columns=pandas_state_columns + owner_pandas_state_columns)
+			self.competitor_reaction_dataframe = pd.DataFrame(columns=pandas_state_columns)
+
 	def _get_number_of_vendors(self) -> int:
 		"""
 		Return the number of competitors plus the agent.
@@ -122,6 +155,9 @@ def _simulate_customers(self, profits, number_of_customers) -> None:
 		assert self._is_probability_distribution_fitting_exactly(probability_distribution)
 
 		customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist()
+		if self.document_for_regression:
+			new_row = self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + customer_decisions
+			self.customers_dataframe.loc[len(self.customers_dataframe)] = new_row
 		self._output_dict['customer/buy_nothing'] += customer_decisions[0]
 		for seller, frequency in enumerate(customer_decisions):
 			if seller == 0 or frequency == 0:
@@ -172,6 +208,10 @@ def step(self, action) -> Tuple[np.array, float, bool, dict]:
 					f'This vendor does not deliver a suitable action, action_space: {self.action_space}, action: {action_competitor_i}'
 				self.vendor_actions[i + 1] = action_competitor_i
 
+		if self.document_for_regression:
+			self.competitor_reaction_dataframe.loc[len(self.competitor_reaction_dataframe)] = \
+				self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist()
+
 		self._consider_storage_costs(profits)
 
 		self._ensure_output_dict_has('profits/all', profits)
diff --git a/recommerce/monitoring/exampleprinter.py b/recommerce/monitoring/exampleprinter.py
index 827189e9..fcdd80db 100644
--- a/recommerce/monitoring/exampleprinter.py
+++ b/recommerce/monitoring/exampleprinter.py
@@ -3,6 +3,8 @@
 import signal
 import sys
 import time
+# turn all warnings into errors
+import warnings
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -21,6 +23,8 @@
 from recommerce.monitoring.svg_manipulation import SVGManipulator
 from recommerce.rl.q_learning.q_learning_agent import QLearningAgent
 
+warnings.filterwarnings('error')
+
 
 class ExamplePrinter():
 
@@ -71,12 +75,12 @@ def _rearrange_info_dicts(self, info_dicts: list, evaluation_left_bound, evaluat
 				info_dict[key].append(info[key])
 		return info_dict
 
-	def run_example(self, save_lineplots=False, evaluation_left_bound=450, evaluation_right_bound=500) -> int:
+	def run_example(self, save_diagrams=True, evaluation_left_bound=450, evaluation_right_bound=500) -> int:
 		"""
 		Run a specified marketplace with a (pre-trained, if RL) agent and record various statistics using TensorBoard.
 
 		Args:
-			save_lineplots (bool, optional): Whether to save lineplots of the market's performance.
+			save_diagrams (bool, optional): Whether to save lineplots of the market's performance.
 
 		Returns:
 			int: The profit made.
@@ -91,11 +95,11 @@ def run_example(self, save_lineplots=False, evaluation_left_bound=450, evaluatio
 		writer = SummaryWriter(log_dir=os.path.join(PathManager.results_path, 'runs', signature))
 		os.makedirs(os.path.join(PathManager.results_path, 'exampleprinter', signature))
 
-		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly):
+		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams:
 			svg_manipulator = SVGManipulator(signature)
 		cumulative_dict = None
 
-		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots:
+		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams:
 			price_used = [[] for _ in range(self.marketplace._number_of_vendors)]
 			price_news = [[] for _ in range(self.marketplace._number_of_vendors)]
 			price_rebuy = [[] for _ in range(self.marketplace._number_of_vendors)]
@@ -107,8 +111,8 @@ def run_example(self, save_lineplots=False, evaluation_left_bound=450, evaluatio
 		with torch.no_grad():
 			while not is_done:
 				action = self.agent.policy(state)
-				print(state)
-				print(action)
+				# print(state)
+				# print(action)
 				state, reward, is_done, logdict = self.marketplace.step(action)
 				info_dicts.append(logdict)
 				if cumulative_dict is not None:
@@ -118,24 +122,24 @@ def run_example(self, save_lineplots=False, evaluation_left_bound=450, evaluatio
 				ut.write_dict_to_tensorboard(writer, logdict, counter)
 				ut.write_dict_to_tensorboard(writer, cumulative_dict, counter, is_cumulative=True,
 					episode_length=self.config_market.episode_length)
-				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly):
+				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams:
 					ut.write_content_of_dict_to_overview_svg(svg_manipulator, counter, logdict, cumulative_dict, self.config_market)
 				our_profit += reward
 				counter += 1
-				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots:
+				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams:
 					for i in range(self.marketplace._number_of_vendors):
 						price_used[i].append(logdict['actions/price_refurbished'][f'vendor_{i}'])
 						price_news[i].append(logdict['actions/price_new'][f'vendor_{i}'])
 						price_rebuy[i].append(logdict['actions/price_rebuy'][f'vendor_{i}'])
 						in_storages[i].append(logdict['state/in_storage'][f'vendor_{i}'])
 					in_circulations.append(logdict['state/in_circulation'])
-				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly):
+				if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams:
 					svg_manipulator.save_overview_svg(filename=('MarketOverview_%.3d' % counter))
 
-		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly):
+		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams:
 			svg_manipulator.to_html()
 
-		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots:
+		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams:
 			self.save_step_diagrams(price_used, price_news, price_rebuy, in_storages, in_circulations, signature,
 				evaluation_left_bound, evaluation_right_bound)
 
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 32a7927d..83c489e1 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -78,7 +78,7 @@ def run_training_session(market_class, config_market, agent_class, config_rl, tr
     exampleprinter = ExamplePrinter(config_market)
     marketplace = market_class(config_market, support_continuous_action_space=True)
     exampleprinter.setup_exampleprinter(marketplace, agent)
-    profit, info_sequences = exampleprinter.run_example(save_lineplots=True)
+    profit, info_sequences = exampleprinter.run_example(save_diagrams=True)
     pipe_to_parent.send(info_sequences)
 
 
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 10c557b8..246d1146 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -55,17 +55,26 @@ def set_marketplace(self, new_marketplace: SimMarket):
 
 	def train_agent(self, training_steps=100001, iteration_length=500, analyze_after_training=True):
 		callback = RecommerceCallback(
-			type(self), self.marketplace, self.config_market, self.config_rl, training_steps=training_steps, iteration_length=iteration_length,
-			signature=self.name, analyze_after_training=analyze_after_training)
+			type(self), self.marketplace, self.config_market, self.config_rl, training_steps=training_steps,
+			iteration_length=iteration_length, signature=self.name, analyze_after_training=analyze_after_training)
 		self.model.learn(training_steps, callback=callback)
+		self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
+		self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
+		self.marketplace.competitor_reaction_dataframe.to_excel(
+			os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
 		return callback.watcher
 
 	def train_with_default_eval(self, training_steps=100001):
 		save_path = os.path.join(PathManager.results_path, 'best_model', f'{self.name}')
 		log_path = os.path.join(PathManager.results_path, 'logs', f'{self.name}')
 		os.makedirs(log_path, exist_ok=True)
-		callback = EvalCallback(Monitor(self.marketplace, filename=log_path), best_model_save_path=save_path, log_path=log_path, render=False)
+		callback = EvalCallback(Monitor(self.marketplace, filename=log_path),
+			best_model_save_path=save_path, log_path=log_path, render=False)
 		self.model.learn(training_steps, callback=callback)
+		self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
+		self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
+		self.marketplace.competitor_reaction_dataframe.to_excel(
+			os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
 		return save_path
 
 	@staticmethod
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index 7d0b36ce..e6882f73 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -133,7 +133,7 @@ def train_stable_baselines_a2c():
 	StableBaselinesA2C(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(100000)
+		marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(200000)
 
 
 def train_stable_baselines_ppo():
@@ -143,7 +143,7 @@ def train_stable_baselines_ppo():
 	StableBaselinesPPO(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=used_marketplace(config_market, True)).train_agent(1000000)
+		marketplace=used_marketplace(config_market, True)).train_agent(200000)
 
 
 def train_stable_baselines_sac():
@@ -202,4 +202,4 @@ def main():
 	# Make sure a valid datapath is set
 	PathManager.manage_user_path()
 
-	main()
+	train_stable_baselines_a2c()
diff --git a/setup.cfg b/setup.cfg
index 02038ed7..ddd3722d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,6 +30,7 @@ install_requires =
 	scipy>=1.8.0
 	attrdict>=2.0.1
     openpyxl>=3.1.2
+    scikit-learn>=1.2.2
 python_requires = >=3.8
 
 [options.extras_require]

From 42d482dadd45b279f5191a5a35cd27710f44a2a3 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Mon, 20 Mar 2023 09:45:26 +0100
Subject: [PATCH 08/20] market with learned customer

---
 .../market/circular/circular_customers.py     | 68 +++++++++++++------
 .../market/circular/circular_sim_market.py    |  4 +-
 recommerce/rl/training_scenario.py            |  2 +-
 3 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index c2df5e41..7589fad1 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -40,13 +40,8 @@ def generate_purchase_probabilities_from_offer(self, market_config, common_state
 
 
 class LinearRegressionCustomer(Customer):
-	def __init__(self) -> None:
-		customers_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'customers_dataframe.xlsx'))
-		customers_dataframe = customers_dataframe.iloc[-50000:, :]
-		print('Dataset read')
-		X = customers_dataframe.iloc[:, 0:6].values
+	def create_x_with_binary_features(self, X):
 		X_dash_list = []
-		print('Now I start to construct the binary features')
 		for price_threshhold in range(10):
 			# iterate throw the columns
 			for i_feature, column in enumerate(X.T):
@@ -54,23 +49,54 @@ def __init__(self) -> None:
 				# append the new column to X
 				X_dash_list.append(column_values.reshape(-1, 1))
 		X_dash = np.concatenate(X_dash_list, axis=1)
-		X = np.concatenate((X, X_dash), axis=1)
-		Y = customers_dataframe.iloc[:, 6:11].values
-		self.regressor = LinearRegression()
-		self.regressor.fit(X, Y)
-		print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}')
-
-		prediction = self.regressor.predict(X)
-		print(f'LinearRegressionCustomer: prediction = {prediction}')
-		customers_dataframe['buy nothing predicted'] = prediction[:, 0]
-		customers_dataframe['buy new agent predicted'] = prediction[:, 1]
-		customers_dataframe['buy refurbished agent predicted'] = prediction[:, 2]
-		customers_dataframe['buy new competitor predicted'] = prediction[:, 3]
-		customers_dataframe['buy refurbished competitor predicted'] = prediction[:, 4]
-		customers_dataframe.to_excel(os.path.join(PathManager.data_path, 'customers_dataframe_predicted.xlsx'), index=False)
+		return np.concatenate((X, X_dash), axis=1)
+
+	def __init__(self) -> None:
+		if not hasattr(LinearRegressionCustomer, 'regressor'):
+			customers_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'customers_dataframe.xlsx'))
+			print('Dataset read')
+			X = customers_dataframe.iloc[:, 0:6].values
+			# Swap the first three columns and the last three columns. Write it to X_swapped
+			X_swapped = np.concatenate((X[:, 3:6], X[:, 0:3]), axis=1)
+			# Concatenate X and X_swapped to X
+			X = np.concatenate((X, X_swapped), axis=0)
+			X = self.create_x_with_binary_features(X)
+			Y = customers_dataframe.iloc[:, 6:11].values
+			# Swap columns 1, 2 and 3, 4. Write it to Y_swapped
+			Y_swapped = np.concatenate((Y[:, 0].reshape(-1, 1), Y[:, 1:3], Y[:, 3:5]), axis=1)
+			# Concatenate Y and Y_swapped to Y
+			Y = np.concatenate((Y, Y_swapped), axis=0)
+			print(X.shape)
+			print(Y.shape)
+			LinearRegressionCustomer.regressor = LinearRegression()
+			LinearRegressionCustomer.regressor.fit(X, Y)
+			print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}')
+
+			prediction = LinearRegressionCustomer.regressor.predict(X)
+			print(f'LinearRegressionCustomer: prediction = {prediction}')
+			# customers_dataframe['buy nothing predicted'] = prediction[:, 0]
+			# customers_dataframe['buy new agent predicted'] = prediction[:, 1]
+			# customers_dataframe['buy refurbished agent predicted'] = prediction[:, 2]
+			# customers_dataframe['buy new competitor predicted'] = prediction[:, 3]
+			# customers_dataframe['buy refurbished competitor predicted'] = prediction[:, 4]
+			# customers_dataframe.to_excel(os.path.join(PathManager.data_path, 'customers_dataframe_predicted.xlsx'), index=False)
 
 	def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array:
-		return np.array([0, 0, 0, 0])
+		assert isinstance(common_state, np.ndarray), 'common_state must be a np.ndarray'
+		assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list'
+		assert isinstance(vendor_actions, list), 'vendor_actions must be a list'
+		assert len(vendor_specific_state) == len(vendor_actions), \
+			'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.'
+		assert len(vendor_specific_state) > 0, 'there must be at least one vendor.'
+
+		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
+		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
+		# print(input_array)
+		prediction = LinearRegressionCustomer.regressor.predict(input_array)[0]
+		prediction = np.where(prediction < 0, 0, prediction)
+		prediction = prediction / np.sum(prediction)
+		# print(prediction)
+		return prediction
 
 
 if __name__ == '__main__':
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index d11873a8..5851bda5 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -7,7 +7,7 @@
 import recommerce.market.circular.circular_vendors as circular_vendors
 import recommerce.market.owner as owner
 from recommerce.configuration.common_rules import between_zero_one_rule, greater_zero_even_rule, greater_zero_rule, non_negative_rule
-from recommerce.market.circular.circular_customers import CustomerCircular
+from recommerce.market.circular.circular_customers import LinearRegressionCustomer
 from recommerce.market.customer import Customer
 from recommerce.market.owner import Owner
 from recommerce.market.sim_market import SimMarket
@@ -83,7 +83,7 @@ def _reset_vendor_actions(self) -> tuple:
 		return (self.config.production_price, self.config.production_price + 1)
 
 	def _choose_customer(self) -> Customer:
-		return CustomerCircular()
+		return LinearRegressionCustomer()
 
 	def _choose_owner(self) -> Owner:
 		return owner.UniformDistributionOwner()
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index e6882f73..b1b06b4e 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -202,4 +202,4 @@ def main():
 	# Make sure a valid datapath is set
 	PathManager.manage_user_path()
 
-	train_stable_baselines_a2c()
+	train_stable_baselines_ppo()

From f35dcaa592e9145f944f7f3f2b522c52aa111da6 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Mon, 20 Mar 2023 13:27:05 +0100
Subject: [PATCH 09/20] market components developed

---
 .../market/circular/circular_customers.py     |  8 +--
 .../market/circular/circular_sim_market.py    |  4 +-
 .../market/circular/circular_vendors.py       | 52 +++++++++--------
 recommerce/market/owner.py                    | 57 ++++++++++++-------
 4 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index 7589fad1..24df37a2 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -61,19 +61,18 @@ def __init__(self) -> None:
 			# Concatenate X and X_swapped to X
 			X = np.concatenate((X, X_swapped), axis=0)
 			X = self.create_x_with_binary_features(X)
+			print(X.shape)
 			Y = customers_dataframe.iloc[:, 6:11].values
 			# Swap columns 1, 2 and 3, 4. Write it to Y_swapped
 			Y_swapped = np.concatenate((Y[:, 0].reshape(-1, 1), Y[:, 1:3], Y[:, 3:5]), axis=1)
 			# Concatenate Y and Y_swapped to Y
 			Y = np.concatenate((Y, Y_swapped), axis=0)
-			print(X.shape)
-			print(Y.shape)
 			LinearRegressionCustomer.regressor = LinearRegression()
 			LinearRegressionCustomer.regressor.fit(X, Y)
 			print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}')
 
-			prediction = LinearRegressionCustomer.regressor.predict(X)
-			print(f'LinearRegressionCustomer: prediction = {prediction}')
+			# prediction = LinearRegressionCustomer.regressor.predict(X)
+			# print(f'LinearRegressionCustomer: prediction = {prediction}')
 			# customers_dataframe['buy nothing predicted'] = prediction[:, 0]
 			# customers_dataframe['buy new agent predicted'] = prediction[:, 1]
 			# customers_dataframe['buy refurbished agent predicted'] = prediction[:, 2]
@@ -91,6 +90,7 @@ def generate_purchase_probabilities_from_offer(self, market_config, common_state
 
 		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
 		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
+		# input_array = np.concatenate((np.array(input_array).reshape(1, -1), input_binary), axis=1)
 		# print(input_array)
 		prediction = LinearRegressionCustomer.regressor.predict(input_array)[0]
 		prediction = np.where(prediction < 0, 0, prediction)
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index 5851bda5..f4606f20 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -318,7 +318,7 @@ def _reset_vendor_actions(self) -> tuple:
 		return (self.config.production_price, self.config.production_price + 1, 1)
 
 	def _choose_owner(self) -> Owner:
-		return owner.OwnerRebuy()
+		return owner.LinearRegressionOwner()
 
 	def _initialize_output_dict(self) -> None:
 		"""
@@ -359,7 +359,7 @@ def get_num_competitors() -> int:
 		return 1
 
 	def _get_competitor_list(self) -> list:
-		return [circular_vendors.RuleBasedCERebuyAgentCompetitive(config_market=self.config,
+		return [circular_vendors.LinearRegressionCERebuyAgent(config_market=self.config,
 			continuous_action_space=self.support_continuous_action_space)]
 
 
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index 3c78a9cc..acbc790e 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -223,40 +223,46 @@ class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
 	"""
 	This vendor's policy is aiming to succeed by undercutting the competitor's prices.
 	"""
-	def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False):
-		self.continuous_action_space = continuous_action_space
-		self.name = name if name != '' else type(self).__name__
-		competitor_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
-		X = competitor_dataframe.iloc[:, 0:3].values
+	def create_x_with_binary_features(self, X):
 		X_dash_list = []
-		print('Now I start to construct the binary features')
 		for price_threshhold in range(10):
 			# iterate throw the columns
 			for i_feature, column in tqdm(enumerate(X.T)):
 				column_values = np.where(column > price_threshhold, 1, 0)
 				# append the new column to X
 				X_dash_list.append(column_values.reshape(-1, 1))
-
 		X_dash = np.concatenate(X_dash_list, axis=1)
-		X = np.concatenate((X, X_dash), axis=1)
-		print(X)
-		print(X.shape)
-		# define Y as the last 3 columns
-		Y = competitor_dataframe.iloc[:, 3:6].values
-		self.regressor = LinearRegression()
-		self.regressor.fit(X, Y)
-		print(f'LinearRegressionCERebuyAgent: {self.regressor.score(X, Y)}')
-
-		predictions = self.regressor.predict(X)
-		print(predictions)
-		print(predictions.shape)
-		competitor_dataframe['predicted_refurbished_price'] = predictions[:, 0]
-		competitor_dataframe['predicted_new_price'] = predictions[:, 1]
-		competitor_dataframe['predicted_rebuy_price'] = predictions[:, 2]
-		competitor_dataframe.to_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False)
+		return np.concatenate((X, X_dash), axis=1)
+
+	def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False):
+		self.continuous_action_space = continuous_action_space
+		self.name = name if name != '' else type(self).__name__
+		if not hasattr(LinearRegressionCERebuyAgent, 'regressor'):
+			competitor_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
+			X = competitor_dataframe.iloc[:, 0:3].values
+
+			X = self.create_x_with_binary_features(X)
+			# define Y as the last 3 columns
+			Y = competitor_dataframe.iloc[:, 3:6].values
+			LinearRegressionCERebuyAgent.regressor = LinearRegression()
+			LinearRegressionCERebuyAgent.regressor.fit(X, Y)
+			print(f'LinearRegressionCERebuyAgent: {LinearRegressionCERebuyAgent.regressor.score(X, Y)}')
+
+			# predictions = self.regressor.predict(X)
+			# print(predictions)
+			# print(predictions.shape)
+			# competitor_dataframe['predicted_refurbished_price'] = predictions[:, 0]
+			# competitor_dataframe['predicted_new_price'] = predictions[:, 1]
+			# competitor_dataframe['predicted_rebuy_price'] = predictions[:, 2]
+			# competitor_dataframe.to_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False)
 
 	def policy(self, observation, *_) -> tuple:
 		assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray'
+		observation = self.create_x_with_binary_features(observation[2:5].reshape(1, -1))
+		prediction = LinearRegressionCERebuyAgent.regressor.predict(observation)
+		# clamp all values of prediction between 0 and 10
+		prediction = np.clip(prediction, 0, 10)
+		return prediction[0]
 
 
 class RuleBasedCERebuyAgentStorageMinimizer(RuleBasedAgent, CircularAgent):
diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py
index fbdde830..33f2bf19 100644
--- a/recommerce/market/owner.py
+++ b/recommerce/market/owner.py
@@ -116,11 +116,8 @@ def generate_return_probabilities_from_offer(self, common_state, vendor_specific
 
 
 class LinearRegressionOwner(Owner):
-	def __init__(self):
-		owner_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'owners_dataframe.xlsx'))
-		X = owner_dataframe.iloc[:, 0:6].values
+	def create_x_with_binary_features(self, X):
 		X_dash_list = []
-		print('Now I start to construct the binary features')
 		for price_threshhold in range(10):
 			# iterate throw the columns
 			for i_feature, column in enumerate(X.T):
@@ -128,23 +125,45 @@ def __init__(self):
 				# append the new column to X
 				X_dash_list.append(column_values.reshape(-1, 1))
 		X_dash = np.concatenate(X_dash_list, axis=1)
-		X = np.concatenate((X, X_dash), axis=1)
-		Y = owner_dataframe.iloc[:, 6:10].values
-		self.regressor = LinearRegression()
-		self.regressor.fit(X, Y)
-		print(f'LinearRegressionOwner: R^2 = {self.regressor.score(X, Y)}')
-
-		# create a new dataframe with predictions and prediction in the column
-		predictions = self.regressor.predict(X)
-		owner_dataframe['predicted holding'] = predictions[:, 0]
-		owner_dataframe['predicted throw away'] = predictions[:, 1]
-		owner_dataframe['predicted agent rebuy'] = predictions[:, 2]
-		owner_dataframe['predicted competitor rebuy'] = predictions[:, 3]
-		# save the dataframe to a new excel file
-		owner_dataframe.to_excel(os.path.join(PathManager.data_path, 'owners_dataframe_predicted.xlsx'), index=False)
+		return np.concatenate((X, X_dash), axis=1)
+
+	def __init__(self):
+		if not hasattr(LinearRegressionOwner, 'regressor'):
+			owner_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'owners_dataframe.xlsx'))
+			X = owner_dataframe.iloc[:, 0:6].values
+			X_swapped = np.concatenate((X[:, 3:6], X[:, 0:3]), axis=1)
+			X = np.concatenate((X, X_swapped), axis=0)
+			X = self.create_x_with_binary_features(X)
+			Y = owner_dataframe.iloc[:, 6:10].values
+			Y_swapped = np.concatenate((Y[:, 0:2], Y[:, 3].reshape(-1, 1), Y[:, 2].reshape(-1, 1)), axis=1)
+			Y = np.concatenate((Y, Y_swapped), axis=0)
+			LinearRegressionOwner.regressor = LinearRegression()
+			LinearRegressionOwner.regressor.fit(X, Y)
+			print(f'LinearRegressionOwner: R^2 = {self.regressor.score(X, Y)}')
+
+			# create a new dataframe with predictions and prediction in the column
+			# predictions = self.regressor.predict(X)
+			# owner_dataframe['predicted holding'] = predictions[:, 0]
+			# owner_dataframe['predicted throw away'] = predictions[:, 1]
+			# owner_dataframe['predicted agent rebuy'] = predictions[:, 2]
+			# owner_dataframe['predicted competitor rebuy'] = predictions[:, 3]
+			# # save the dataframe to a new excel file
+			# owner_dataframe.to_excel(os.path.join(PathManager.data_path, 'owners_dataframe_predicted.xlsx'), index=False)
 
 	def generate_return_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:
-		return np.array([0, 0, 0, 0])
+		assert isinstance(common_state, np.ndarray), 'offers needs to be a ndarray'
+		assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list'
+		assert isinstance(vendor_actions, list), 'vendor_actions must be a list'
+		assert len(vendor_specific_state) == len(vendor_actions), \
+			'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.'
+		assert len(vendor_specific_state) > 0, 'there must be at least one vendor.'
+
+		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
+		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
+		prediction = LinearRegressionOwner.regressor.predict(input_array)[0]
+		prediction = np.where(prediction < 0, 0, prediction)
+		prediction = prediction / np.sum(prediction)
+		return prediction
 
 
 if __name__ == '__main__':

From a7097923d6cc723b458cee3a46174986c6c15f58 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Tue, 21 Mar 2023 23:37:03 +0100
Subject: [PATCH 10/20] improvements

---
 .../market/circular/circular_customers.py     |  1 -
 .../market/circular/circular_sim_market.py    | 28 ++++++++++++++++---
 .../market/circular/circular_vendors.py       |  3 +-
 recommerce/market/owner.py                    |  5 +++-
 recommerce/market/sim_market.py               |  4 +--
 recommerce/rl/training_scenario.py            |  6 ++--
 6 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index 24df37a2..0e4471e6 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -61,7 +61,6 @@ def __init__(self) -> None:
 			# Concatenate X and X_swapped to X
 			X = np.concatenate((X, X_swapped), axis=0)
 			X = self.create_x_with_binary_features(X)
-			print(X.shape)
 			Y = customers_dataframe.iloc[:, 6:11].values
 			# Swap columns 1, 2 and 3, 4. Write it to Y_swapped
 			Y_swapped = np.concatenate((Y[:, 0].reshape(-1, 1), Y[:, 1:3], Y[:, 3:5]), axis=1)
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index f4606f20..2232cbb0 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -7,7 +7,7 @@
 import recommerce.market.circular.circular_vendors as circular_vendors
 import recommerce.market.owner as owner
 from recommerce.configuration.common_rules import between_zero_one_rule, greater_zero_even_rule, greater_zero_rule, non_negative_rule
-from recommerce.market.circular.circular_customers import LinearRegressionCustomer
+from recommerce.market.circular.circular_customers import CustomerCircular, LinearRegressionCustomer
 from recommerce.market.customer import Customer
 from recommerce.market.owner import Owner
 from recommerce.market.sim_market import SimMarket
@@ -83,7 +83,7 @@ def _reset_vendor_actions(self) -> tuple:
 		return (self.config.production_price, self.config.production_price + 1)
 
 	def _choose_customer(self) -> Customer:
-		return LinearRegressionCustomer()
+		return CustomerCircular()
 
 	def _choose_owner(self) -> Owner:
 		return owner.UniformDistributionOwner()
@@ -318,7 +318,7 @@ def _reset_vendor_actions(self) -> tuple:
 		return (self.config.production_price, self.config.production_price + 1, 1)
 
 	def _choose_owner(self) -> Owner:
-		return owner.LinearRegressionOwner()
+		return owner.OwnerRebuy()
 
 	def _initialize_output_dict(self) -> None:
 		"""
@@ -359,10 +359,30 @@ def get_num_competitors() -> int:
 		return 1
 
 	def _get_competitor_list(self) -> list:
-		return [circular_vendors.LinearRegressionCERebuyAgent(config_market=self.config,
+		return [circular_vendors.RuleBasedCERebuyAgentCompetitive(config_market=self.config,
 			continuous_action_space=self.support_continuous_action_space)]
 
 
+class CircularEconomyRebuyPriceDuopolyFitted(CircularEconomyRebuyPrice):
+	"""
+	This is a circular economy with rebuy price, so the vendors buy back their products from the customers.
+	There are two vendors.
+	"""
+	@staticmethod
+	def get_num_competitors() -> int:
+		return 1
+
+	def _get_competitor_list(self) -> list:
+		return [circular_vendors.RuleBasedCERebuyAgentCompetitive(config_market=self.config,
+			continuous_action_space=self.support_continuous_action_space)]
+
+	def _choose_customer(self) -> Customer:
+		return LinearRegressionCustomer()
+
+	# def _choose_owner(self) -> Owner:
+	# 	return owner.LinearRegressionOwner()
+
+
 class CircularEconomyRebuyPriceOligopoly(CircularEconomyRebuyPrice):
 	"""
 	This is a circular economy with rebuy price, so the vendors buy back their products from the customers.
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index acbc790e..cc4c548c 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -7,7 +7,6 @@
 import pandas as pd
 from attrdict import AttrDict
 from sklearn.linear_model import LinearRegression
-from tqdm import tqdm
 
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.vendors import Agent, FixedPriceAgent, HumanPlayer, RuleBasedAgent
@@ -227,7 +226,7 @@ def create_x_with_binary_features(self, X):
 		X_dash_list = []
 		for price_threshhold in range(10):
 			# iterate throw the columns
-			for i_feature, column in tqdm(enumerate(X.T)):
+			for i_feature, column in enumerate(X.T):
 				column_values = np.where(column > price_threshhold, 1, 0)
 				# append the new column to X
 				X_dash_list.append(column_values.reshape(-1, 1))
diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py
index 33f2bf19..2a425099 100644
--- a/recommerce/market/owner.py
+++ b/recommerce/market/owner.py
@@ -159,12 +159,15 @@ def generate_return_probabilities_from_offer(self, common_state, vendor_specific
 		assert len(vendor_specific_state) > 0, 'there must be at least one vendor.'
 
 		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
+		print(input_array)
 		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
 		prediction = LinearRegressionOwner.regressor.predict(input_array)[0]
+		print(prediction)
 		prediction = np.where(prediction < 0, 0, prediction)
 		prediction = prediction / np.sum(prediction)
+		print(prediction)
 		return prediction
 
 
 if __name__ == '__main__':
-	LinearRegressionOwner()
+	LinearRegressionOwner()
diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py
index 64897bd2..95504ab2 100644
--- a/recommerce/market/sim_market.py
+++ b/recommerce/market/sim_market.py
@@ -88,10 +88,10 @@ def __init__(
 			]
 			purchases_pandas_state_columns = [
 				'buy nothing',
-				'buy new agent',
 				'buy refurbished agent',
-				'buy new competitor',
+				'buy new agent',
 				'buy refurbished competitor',
+				'buy new competitor',
 			]
 			owner_pandas_state_columns = [
 				'product holding',
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index b1b06b4e..ceb0202b 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -133,17 +133,17 @@ def train_stable_baselines_a2c():
 	StableBaselinesA2C(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(200000)
+		marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(500000)
 
 
 def train_stable_baselines_ppo():
-	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopoly
+	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopolyFitted
 	config_market: AttrDict = HyperparameterConfigLoader.load('market_config', used_marketplace)
 	config_rl: AttrDict = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
 	StableBaselinesPPO(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=used_marketplace(config_market, True)).train_agent(200000)
+		marketplace=used_marketplace(config_market, True)).train_agent(500000)
 
 
 def train_stable_baselines_sac():

From bd63219623bf0150bdb0ba1a5a0359cdee56fbcb Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Thu, 23 Mar 2023 02:25:37 +0100
Subject: [PATCH 11/20] all fitted markets working

---
 .../market/circular/circular_customers.py     | 40 +++++++------------
 .../market/circular/circular_sim_market.py    | 16 +++++---
 recommerce/market/owner.py                    | 37 +++++++----------
 recommerce/market/sim_market.py               | 13 +++++-
 recommerce/rl/callback.py                     |  3 +-
 .../stable_baselines_model.py                 |  4 --
 6 files changed, 52 insertions(+), 61 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index 0e4471e6..e2916c8b 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -56,29 +56,13 @@ def __init__(self) -> None:
 			customers_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'customers_dataframe.xlsx'))
 			print('Dataset read')
 			X = customers_dataframe.iloc[:, 0:6].values
-			# Swap the first three columns and the last three columns. Write it to X_swapped
-			X_swapped = np.concatenate((X[:, 3:6], X[:, 0:3]), axis=1)
-			# Concatenate X and X_swapped to X
-			X = np.concatenate((X, X_swapped), axis=0)
-			X = self.create_x_with_binary_features(X)
-			Y = customers_dataframe.iloc[:, 6:11].values
-			# Swap columns 1, 2 and 3, 4. Write it to Y_swapped
-			Y_swapped = np.concatenate((Y[:, 0].reshape(-1, 1), Y[:, 1:3], Y[:, 3:5]), axis=1)
-			# Concatenate Y and Y_swapped to Y
-			Y = np.concatenate((Y, Y_swapped), axis=0)
+			# X = self.create_x_with_binary_features(X)
+			Y = customers_dataframe.iloc[:, 6:9].values
+
 			LinearRegressionCustomer.regressor = LinearRegression()
 			LinearRegressionCustomer.regressor.fit(X, Y)
 			print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}')
 
-			# prediction = LinearRegressionCustomer.regressor.predict(X)
-			# print(f'LinearRegressionCustomer: prediction = {prediction}')
-			# customers_dataframe['buy nothing predicted'] = prediction[:, 0]
-			# customers_dataframe['buy new agent predicted'] = prediction[:, 1]
-			# customers_dataframe['buy refurbished agent predicted'] = prediction[:, 2]
-			# customers_dataframe['buy new competitor predicted'] = prediction[:, 3]
-			# customers_dataframe['buy refurbished competitor predicted'] = prediction[:, 4]
-			# customers_dataframe.to_excel(os.path.join(PathManager.data_path, 'customers_dataframe_predicted.xlsx'), index=False)
-
 	def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array:
 		assert isinstance(common_state, np.ndarray), 'common_state must be a np.ndarray'
 		assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list'
@@ -87,14 +71,18 @@ def generate_purchase_probabilities_from_offer(self, market_config, common_state
 			'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.'
 		assert len(vendor_specific_state) > 0, 'there must be at least one vendor.'
 
-		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
-		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
-		# input_array = np.concatenate((np.array(input_array).reshape(1, -1), input_binary), axis=1)
-		# print(input_array)
-		prediction = LinearRegressionCustomer.regressor.predict(input_array)[0]
+		input_array_customer = np.array(list(vendor_actions[0]) + list(vendor_actions[1])).reshape(1, -1)
+		# input_array_customer = self.create_x_with_binary_features(input_array_customer)
+		prediction_for_customer = LinearRegressionCustomer.regressor.predict(input_array_customer)[0]
+
+		input_array_competitor = np.array(list(vendor_actions[1]) + list(vendor_actions[0])).reshape(1, -1)
+		# input_array_competitor = self.create_x_with_binary_features(input_array_competitor)
+		prediction_for_competitor = LinearRegressionCustomer.regressor.predict(input_array_competitor)[0]
+
+		prediction = np.concatenate((prediction_for_customer, prediction_for_competitor[1:3]))
+
 		prediction = np.where(prediction < 0, 0, prediction)
-		prediction = prediction / np.sum(prediction)
-		# print(prediction)
+
 		return prediction
 
 
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index 2232cbb0..8ccb1973 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -130,11 +130,17 @@ def _simulate_owners(self, profits) -> None:
 		assert len(return_probabilities) == 2 + self._number_of_vendors, \
 			'the length of return_probabilities must be the number of vendors plus 2'
 
-		number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors)
-		owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist()
+		if np.abs(np.sum(return_probabilities) - 1) < 0.001:
+			number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors)
+			owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist()
+		else:
+			owner_decisions = [0] * len(return_probabilities)
+			for i, prediction in enumerate(return_probabilities):
+				owner_decisions[i] = np.ceil(prediction) if np.random.random() < prediction - np.floor(prediction) else np.floor(prediction)
+			owner_decisions = [int(x) for x in owner_decisions]
 
 		if self.document_for_regression:
-			new_row = self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + owner_decisions
+			new_row = self._observation(0)[0:1].tolist() + self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + owner_decisions
 			self.owners_dataframe.loc[len(self.owners_dataframe)] = new_row
 
 		# owner decisions can be as follows:
@@ -379,8 +385,8 @@ def _get_competitor_list(self) -> list:
 	def _choose_customer(self) -> Customer:
 		return LinearRegressionCustomer()
 
-	# def _choose_owner(self) -> Owner:
-	# 	return owner.LinearRegressionOwner()
+	def _choose_owner(self) -> Owner:
+		return owner.LinearRegressionOwner()
 
 
 class CircularEconomyRebuyPriceOligopoly(CircularEconomyRebuyPrice):
diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py
index 2a425099..cfa690fd 100644
--- a/recommerce/market/owner.py
+++ b/recommerce/market/owner.py
@@ -130,26 +130,13 @@ def create_x_with_binary_features(self, X):
 	def __init__(self):
 		if not hasattr(LinearRegressionOwner, 'regressor'):
 			owner_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'owners_dataframe.xlsx'))
-			X = owner_dataframe.iloc[:, 0:6].values
-			X_swapped = np.concatenate((X[:, 3:6], X[:, 0:3]), axis=1)
-			X = np.concatenate((X, X_swapped), axis=0)
-			X = self.create_x_with_binary_features(X)
-			Y = owner_dataframe.iloc[:, 6:10].values
-			Y_swapped = np.concatenate((Y[:, 0:2], Y[:, 3].reshape(-1, 1), Y[:, 2].reshape(-1, 1)), axis=1)
-			Y = np.concatenate((Y, Y_swapped), axis=0)
+			X = owner_dataframe.iloc[:, 0:7].values
+			# X = self.create_x_with_binary_features(X)
+			Y = owner_dataframe.iloc[:, 7:10].values
 			LinearRegressionOwner.regressor = LinearRegression()
 			LinearRegressionOwner.regressor.fit(X, Y)
 			print(f'LinearRegressionOwner: R^2 = {self.regressor.score(X, Y)}')
 
-			# create a new dataframe with predictions and prediction in the column
-			# predictions = self.regressor.predict(X)
-			# owner_dataframe['predicted holding'] = predictions[:, 0]
-			# owner_dataframe['predicted throw away'] = predictions[:, 1]
-			# owner_dataframe['predicted agent rebuy'] = predictions[:, 2]
-			# owner_dataframe['predicted competitor rebuy'] = predictions[:, 3]
-			# # save the dataframe to a new excel file
-			# owner_dataframe.to_excel(os.path.join(PathManager.data_path, 'owners_dataframe_predicted.xlsx'), index=False)
-
 	def generate_return_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array:
 		assert isinstance(common_state, np.ndarray), 'offers needs to be a ndarray'
 		assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list'
@@ -158,14 +145,18 @@ def generate_return_probabilities_from_offer(self, common_state, vendor_specific
 			'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.'
 		assert len(vendor_specific_state) > 0, 'there must be at least one vendor.'
 
-		input_array = list(vendor_actions[0]) + list(vendor_actions[1])
-		print(input_array)
-		input_array = self.create_x_with_binary_features(np.array(input_array).reshape(1, -1))
-		prediction = LinearRegressionOwner.regressor.predict(input_array)[0]
-		print(prediction)
+		input_array_customer = np.array(common_state.tolist() + list(vendor_actions[0]) + list(vendor_actions[1])).reshape(1, -1)
+		# input_array_customer = self.create_x_with_binary_features(input_array_customer)
+		prediction_for_customer = LinearRegressionOwner.regressor.predict(input_array_customer)[0]
+
+		input_array_competitor = np.array(common_state.tolist() + list(vendor_actions[1]) + list(vendor_actions[0])).reshape(1, -1)
+		# input_array_competitor = self.create_x_with_binary_features(input_array_competitor)
+		prediction_for_competitor = LinearRegressionOwner.regressor.predict(input_array_competitor)[0]
+
+		prediction = np.concatenate((prediction_for_customer, prediction_for_competitor[2:3]))
+
 		prediction = np.where(prediction < 0, 0, prediction)
-		prediction = prediction / np.sum(prediction)
-		print(prediction)
+
 		return prediction
 
 
diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py
index 95504ab2..069af843 100644
--- a/recommerce/market/sim_market.py
+++ b/recommerce/market/sim_market.py
@@ -62,6 +62,7 @@ def __init__(
 			support_continuous_action_space (bool, optional): If True, the action space will be continuous. Defaults to False.
 			competitors (list, optional): If not None, this overwrites the default competitor list with a custom one.
 		"""
+		print(f'I initialize {type(self)} as market')
 		self.config = config
 		self.support_continuous_action_space = support_continuous_action_space
 		self.competitors = self._get_competitor_list() if not competitors else competitors
@@ -100,7 +101,7 @@ def __init__(
 				'rebuy competitor',
 			]
 			self.customers_dataframe = pd.DataFrame(columns=pandas_state_columns + purchases_pandas_state_columns)
-			self.owners_dataframe = pd.DataFrame(columns=pandas_state_columns + owner_pandas_state_columns)
+			self.owners_dataframe = pd.DataFrame(columns=['in circulation'] + pandas_state_columns + owner_pandas_state_columns)
 			self.competitor_reaction_dataframe = pd.DataFrame(columns=pandas_state_columns)
 
 	def _get_number_of_vendors(self) -> int:
@@ -154,7 +155,15 @@ def _simulate_customers(self, profits, number_of_customers) -> None:
 		assert isinstance(probability_distribution, np.ndarray), 'generate_purchase_probabilities_from_offer must return an np.ndarray'
 		assert self._is_probability_distribution_fitting_exactly(probability_distribution)
 
-		customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist()
+		if np.abs(np.sum(probability_distribution) - 1) < 0.001:
+			customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist()
+		else:
+			# Warning: This is not a probability distribution. This should be refactored.
+			customer_decisions = [0] * len(probability_distribution)
+			for i, prediction in enumerate(probability_distribution):
+				customer_decisions[i] = np.ceil(prediction) if np.random.random() < prediction - np.floor(prediction) else np.floor(prediction)
+			customer_decisions = [int(x) for x in customer_decisions]
+
 		if self.document_for_regression:
 			new_row = self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + customer_decisions
 			self.customers_dataframe.loc[len(self.customers_dataframe)] = new_row
diff --git a/recommerce/rl/callback.py b/recommerce/rl/callback.py
index 44fd23da..977ad8bd 100644
--- a/recommerce/rl/callback.py
+++ b/recommerce/rl/callback.py
@@ -11,6 +11,7 @@
 
 import recommerce.configuration.utils as ut
 from recommerce.configuration.path_manager import PathManager
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted
 from recommerce.market.sim_market import SimMarket
 from recommerce.market.vendors import RuleBasedAgent
 from recommerce.monitoring.agent_monitoring.am_monitoring import Monitor
@@ -176,7 +177,7 @@ def _on_training_end(self) -> None:
 			analyze_consecutive_models(
 				self.saved_parameter_paths,
 				monitor,
-				type(self.marketplace),
+				CircularEconomyRebuyPriceDuopoly if isinstance(self.marketplace, CircularEconomyRebuyPriceDuopolyFitted) else type(self.marketplace),
 				self.config_market,
 				self.agent_class,
 				hasattr(self.model, 'env'),
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 246d1146..b2bd9171 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -58,10 +58,6 @@ def train_agent(self, training_steps=100001, iteration_length=500, analyze_after
 			type(self), self.marketplace, self.config_market, self.config_rl, training_steps=training_steps,
 			iteration_length=iteration_length, signature=self.name, analyze_after_training=analyze_after_training)
 		self.model.learn(training_steps, callback=callback)
-		self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
-		self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
-		self.marketplace.competitor_reaction_dataframe.to_excel(
-			os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
 		return callback.watcher
 
 	def train_with_default_eval(self, training_steps=100001):

From 5b8d84a7a85e65fc7fd44b63f05401bee2a56c21 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Fri, 24 Mar 2023 15:11:19 +0100
Subject: [PATCH 12/20] distributed ablation study

---
 recommerce/rl/ablation_study.py               | 76 ++++++++-----------
 .../stable_baselines_model.py                 |  9 ++-
 2 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 83c489e1..721733ed 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -78,7 +78,7 @@ def run_training_session(market_class, config_market, agent_class, config_rl, tr
     exampleprinter = ExamplePrinter(config_market)
     marketplace = market_class(config_market, support_continuous_action_space=True)
     exampleprinter.setup_exampleprinter(marketplace, agent)
-    profit, info_sequences = exampleprinter.run_example(save_diagrams=True)
+    profit, info_sequences = exampleprinter.run_example(save_diagrams=False)
     pipe_to_parent.send(info_sequences)
 
 
@@ -93,7 +93,7 @@ def run_group(market_configs, market_descriptions, training_steps, target_functi
         for config_market, description, (_, pipe_entry) in zip(market_configs, market_descriptions, pipes)]
     print('Now I start the processes')
     for p in processes:
-        time.sleep(10)
+        time.sleep(2)
         p.start()
     print('Now I wait for the results')
     info_sequences = [output.recv() for output, _ in pipes]
@@ -115,45 +115,33 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    storage_df = run_group(*get_different_market_configs('max_storage', [20, 50, 100, 200]), training_steps=100000)
-    print(storage_df)
-    storage_df.to_excel(os.path.join(PathManager.results_path, 'storage.xlsx'), index=False)
-    production_price_df = run_group(*get_different_market_configs('production_price', [2, 3, 4]), training_steps=100000)
-    print(production_price_df)
-    production_price_df.to_excel(os.path.join(PathManager.results_path, 'production_price.xlsx'), index=False)
-    number_of_customers_df = run_group(*get_different_market_configs('number_of_customers', [10, 20, 30]), training_steps=100000)
-    print(number_of_customers_df)
-    number_of_customers_df.to_excel(os.path.join(PathManager.results_path, 'number_of_customers.xlsx'), index=False)
-    storage_cost_df = run_group(*get_different_market_configs('storage_cost', [0.01, 0.05, 0.1, 0.2]), training_steps=100000)
-    print(storage_cost_df)
-    storage_cost_df.to_excel(os.path.join(PathManager.results_path, 'storage_cost.xlsx'), index=False)
-    compared_value_old_df = run_group(*get_different_market_configs('compared_value_old', [0.4, 0.55, 0.6]), training_steps=100000)
-    print(compared_value_old_df)
-    compared_value_old_df.to_excel(os.path.join(PathManager.results_path, 'compared_value_old.xlsx'), index=False)
-    upper_tolerance_old_df = run_group(*get_different_market_configs('upper_tolerance_old', [4.0, 5.0, 6.0]), training_steps=100000)
-    print(upper_tolerance_old_df)
-    upper_tolerance_old_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_old.xlsx'), index=False)
-    upper_tolerance_new_df = run_group(*get_different_market_configs('upper_tolerance_new', [7.0, 8.0, 9.0]), training_steps=100000)
-    print(upper_tolerance_new_df)
-    upper_tolerance_new_df.to_excel(os.path.join(PathManager.results_path, 'upper_tolerance_new.xlsx'), index=False)
-    share_interested_owners_df = run_group(*get_different_market_configs('share_interested_owners', [0.025, 0.05, 0.075]),
-                                           training_steps=100000)
-    print(share_interested_owners_df)
-    share_interested_owners_df.to_excel(os.path.join(PathManager.results_path, 'share_interested_owners.xlsx'), index=False)
-    competitor_lowest_storage_level_df = run_group(*get_different_market_configs('competitor_lowest_storage_level', [4.5, 6.5, 8.5]),
-                                                   training_steps=100000)
-    print(competitor_lowest_storage_level_df)
-    competitor_lowest_storage_level_df.to_excel(os.path.join(PathManager.results_path, 'competitor_lowest_storage_level.xlsx'),
-                                                index=False)
-    competitor_ok_storage_level_df = run_group(*get_different_market_configs('competitor_ok_storage_level', [9.5, 12.5, 15.5]),
-                                               training_steps=100000)
-
-    # merge all dataframes
-    all_dataframes = [storage_df, production_price_df, number_of_customers_df, storage_cost_df, compared_value_old_df,
-                    upper_tolerance_old_df, upper_tolerance_new_df, share_interested_owners_df,
-                    competitor_lowest_storage_level_df, competitor_ok_storage_level_df]
-    all_dataframes = [df.set_index('market configuration') for df in all_dataframes]
-    merged_df = pd.concat(all_dataframes, axis=1)
-
-    # save merged dataframe to excel
-    merged_df.to_excel(os.path.join(PathManager.results_path, 'merged.xlsx'))
+    experiments = [('max_storage', [20, 50, 200]),
+        ('production_price', [2, 4]),
+        ('number_of_customers', [10, 30]),
+        ('storage_cost', [0.01, 0.1, 0.2]),
+        ('compared_value_old', [0.4, 0.6]),
+        ('upper_tolerance_old', [4.0, 6.0]),
+        ('upper_tolerance_new', [7.0, 9.0]),
+        ('share_interested_owners', [0.025, 0.075]),
+        ('competitor_lowest_storage_level', [4.5, 8.5]),
+        ('competitor_ok_storage_level', [9.5, 15.5])
+    ]
+    market_configs, descriptions = [], []
+    for experiment in experiments:
+        print(experiment)
+        single_configs, single_descriptions = get_different_market_configs(*experiment)
+        market_configs += single_configs
+        descriptions += single_descriptions
+
+    print(f'Now I start the experiments. There are {len(market_configs)} experiments in total.')
+    dataframes = []
+    for i in range(0, len(market_configs), 8):
+        print(f'Now I start the experiments {i}-{i+8}')
+        tmp_dataframe = run_group(market_configs[i:i+8], descriptions[i:i+8], 1000000)
+        dataframes.append(tmp_dataframe)
+        print(f'Saving dataframe {i}-{i+8}')
+        tmp_dataframe.to_excel(os.path.join(PathManager.results_path, f'dataframe{i}-{i+8}.xlsx'), index=False)
+    dataframe = pd.concat(dataframes)
+    print('Now I have the dataframe. I save it...')
+    dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe.xlsx'), index=False)
+    print('Done')
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index b2bd9171..309a3e9a 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -67,10 +67,11 @@ def train_with_default_eval(self, training_steps=100001):
 		callback = EvalCallback(Monitor(self.marketplace, filename=log_path),
 			best_model_save_path=save_path, log_path=log_path, render=False)
 		self.model.learn(training_steps, callback=callback)
-		self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
-		self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
-		self.marketplace.competitor_reaction_dataframe.to_excel(
-			os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
+		if self.marketplace.document_for_regression:
+			self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
+			self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
+			self.marketplace.competitor_reaction_dataframe.to_excel(
+				os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
 		return save_path
 
 	@staticmethod

From 7476e0b3f04ecc6ae52856dc818e1f1c62c87658 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Sat, 1 Apr 2023 19:27:53 +0200
Subject: [PATCH 13/20] sscurve_vendor

---
 .../market/circular/circular_vendors.py       | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index cc4c548c..477d403e 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -218,6 +218,38 @@ def policy(self, observation, *_) -> tuple:
 			if random.random() < 0.8 else (random.randint(0, 10), random.randint(0, 10), random.randint(0, 10)))
 
 
+class RuleBasedCERebuyAgentSSCurve(RuleBasedAgent, CircularAgent):
+	"""
+	This vendor's policy is aiming to succeed by undercutting the competitor's prices.
+	"""
+	def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False):
+		self.continuous_action_space = continuous_action_space
+		self.name = name if name != '' else type(self).__name__
+		self.config_market = config_market
+
+	def policy(self, observation, *_) -> tuple:
+		lower_bound_new = 4
+		upper_bound_new = 9
+		lower_bound_refurbished = 1
+		upper_bound_refurbished = 7
+		step_size = 1
+		competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True)
+
+		new_price = upper_bound_new if competitors_new_prices < lower_bound_new else competitors_new_prices - step_size
+		refurbished_price = upper_bound_refurbished if competitors_refurbished_prices < lower_bound_refurbished else \
+			competitors_refurbished_prices - step_size
+
+		own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item()
+		if own_storage < self.config_market.competitor_lowest_storage_level:
+			rebuy_price = max(competitors_rebuy_prices + 1, 2)
+		elif own_storage < self.config_market.competitor_ok_storage_level:
+			rebuy_price = max(min(competitors_rebuy_prices, 0.25), 2)
+		else:
+			rebuy_price = max(min(competitors_rebuy_prices - 1), 2)
+
+		return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price)))
+
+
 class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
 	"""
 	This vendor's policy is aiming to succeed by undercutting the competitor's prices.

From e03be76d55eda72699107aa5ef51471e868b444c Mon Sep 17 00:00:00 2001
From: Jan Groeneveld <jan.groeneveld@student.hpi.de>
Date: Sun, 2 Apr 2023 00:51:40 +0200
Subject: [PATCH 14/20] small changes

---
 .../market/circular/circular_customers.py     |  2 +-
 .../market/circular/circular_vendors.py       | 19 ++++++++++---------
 recommerce/market/owner.py                    |  2 +-
 recommerce/market/samples_generation.py       | 10 +++++-----
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index e2916c8b..f811371e 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -53,7 +53,7 @@ def create_x_with_binary_features(self, X):
 
 	def __init__(self) -> None:
 		if not hasattr(LinearRegressionCustomer, 'regressor'):
-			customers_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'customers_dataframe.xlsx'))
+			customers_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'customers_dataframe.xlsx'))
 			print('Dataset read')
 			X = customers_dataframe.iloc[:, 0:6].values
 			# X = self.create_x_with_binary_features(X)
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index 477d403e..e864056a 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -235,19 +235,20 @@ def policy(self, observation, *_) -> tuple:
 		step_size = 1
 		competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True)
 
-		new_price = upper_bound_new if competitors_new_prices < lower_bound_new else competitors_new_prices - step_size
-		refurbished_price = upper_bound_refurbished if competitors_refurbished_prices < lower_bound_refurbished else \
-			competitors_refurbished_prices - step_size
+		new_price = upper_bound_new if competitors_new_prices[0] < lower_bound_new else competitors_new_prices[0] - step_size
+		refurbished_price = upper_bound_refurbished if competitors_refurbished_prices[0] < lower_bound_refurbished else \
+			competitors_refurbished_prices[0] - step_size
 
 		own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item()
 		if own_storage < self.config_market.competitor_lowest_storage_level:
-			rebuy_price = max(competitors_rebuy_prices + 1, 2)
+			rebuy_price = max(min(competitors_rebuy_prices) + 1, 2)
 		elif own_storage < self.config_market.competitor_ok_storage_level:
-			rebuy_price = max(min(competitors_rebuy_prices, 0.25), 2)
+			rebuy_price = max(min(competitors_rebuy_prices), 2)
 		else:
-			rebuy_price = max(min(competitors_rebuy_prices - 1), 2)
+			rebuy_price = max(min(competitors_rebuy_prices) - 1, 2)
 
-		return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price)))
+		return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price))
+			if random.random() < 0.8 else (random.randint(0, 10), random.randint(0, 10), random.randint(0, 10)))
 
 
 class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
@@ -269,7 +270,7 @@ def __init__(self, config_market: AttrDict, name='', continuous_action_space: bo
 		self.continuous_action_space = continuous_action_space
 		self.name = name if name != '' else type(self).__name__
 		if not hasattr(LinearRegressionCERebuyAgent, 'regressor'):
-			competitor_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
+			competitor_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
 			X = competitor_dataframe.iloc[:, 0:3].values
 
 			X = self.create_x_with_binary_features(X)
@@ -285,7 +286,7 @@ def __init__(self, config_market: AttrDict, name='', continuous_action_space: bo
 			# competitor_dataframe['predicted_refurbished_price'] = predictions[:, 0]
 			# competitor_dataframe['predicted_new_price'] = predictions[:, 1]
 			# competitor_dataframe['predicted_rebuy_price'] = predictions[:, 2]
-			# competitor_dataframe.to_excel(os.path.join(PathManager.data_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False)
+			# competitor_dataframe.to_excel(os.path.join(PathManager.results_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False)
 
 	def policy(self, observation, *_) -> tuple:
 		assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray'
diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py
index cfa690fd..381aa1bf 100644
--- a/recommerce/market/owner.py
+++ b/recommerce/market/owner.py
@@ -129,7 +129,7 @@ def create_x_with_binary_features(self, X):
 
 	def __init__(self):
 		if not hasattr(LinearRegressionOwner, 'regressor'):
-			owner_dataframe = pd.read_excel(os.path.join(PathManager.data_path, 'owners_dataframe.xlsx'))
+			owner_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'owners_dataframe.xlsx'))
 			X = owner_dataframe.iloc[:, 0:7].values
 			# X = self.create_x_with_binary_features(X)
 			Y = owner_dataframe.iloc[:, 7:10].values
diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py
index 3eb06952..00bbc893 100644
--- a/recommerce/market/samples_generation.py
+++ b/recommerce/market/samples_generation.py
@@ -5,21 +5,21 @@
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
-from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector
+from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSSCurve
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 
 if __name__ == '__main__':
 	config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
 	exampleprinter = ExamplePrinter(config_market)
-	agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True)
+	agent = RuleBasedCERebuyAgentSSCurve(config_market, 'Sample Collector', True)
 	marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True)
 	exampleprinter.setup_exampleprinter(marketplace, agent)
 	for _ in tqdm(range(20)):
 		exampleprinter.run_example(False)
 	print('Saving customers dataframe...')
-	marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, 'customers_dataframe_generated.xlsx'), index=False)
+	marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, 'customers_dataframe.xlsx'), index=False)
 	print('Saving owners dataframe...')
-	marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, 'owners_dataframe_generated.xlsx'), index=False)
+	marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, 'owners_dataframe.xlsx'), index=False)
 	print('Saving reaction dataframe...')
 	marketplace.competitor_reaction_dataframe.to_excel(
-		os.path.join(PathManager.results_path, 'competitor_reaction_dataframe_generated.xlsx'), index=False)
+		os.path.join(PathManager.results_path, 'competitor_reaction_dataframe.xlsx'), index=False)

From a714263ce0c74c13ad9dab748c0def268c785490 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Sun, 2 Apr 2023 13:46:23 +0200
Subject: [PATCH 15/20] train changes

---
 .../stable_baselines_model.py                 | 27 +++++++++++++++----
 recommerce/rl/training_scenario.py            |  2 +-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 309a3e9a..1f6b8efa 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -1,15 +1,17 @@
 import os
+import time
 from abc import ABC, abstractmethod
 
 import numpy as np
 from attrdict import AttrDict
-from stable_baselines3.common.callbacks import EvalCallback
-from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.callbacks import CheckpointCallback
 
 from recommerce.configuration.path_manager import PathManager
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
 from recommerce.market.circular.circular_vendors import CircularAgent
 from recommerce.market.linear.linear_vendors import LinearAgent
 from recommerce.market.sim_market import SimMarket
+from recommerce.monitoring.exampleprinter import ExamplePrinter
 from recommerce.rl.callback import RecommerceCallback
 from recommerce.rl.reinforcement_learning_agent import ReinforcementLearningAgent
 
@@ -61,17 +63,32 @@ def train_agent(self, training_steps=100001, iteration_length=500, analyze_after
 		return callback.watcher
 
 	def train_with_default_eval(self, training_steps=100001):
-		save_path = os.path.join(PathManager.results_path, 'best_model', f'{self.name}')
+		save_path = os.path.join(PathManager.results_path, f'model_files_{time.strftime("%b%d_%H-%M-%S")}', f'{self.name}')
 		log_path = os.path.join(PathManager.results_path, 'logs', f'{self.name}')
 		os.makedirs(log_path, exist_ok=True)
-		callback = EvalCallback(Monitor(self.marketplace, filename=log_path),
-			best_model_save_path=save_path, log_path=log_path, render=False)
+		step_size = 25000
+		callback = CheckpointCallback(step_size, save_path=save_path)
 		self.model.learn(training_steps, callback=callback)
 		if self.marketplace.document_for_regression:
 			self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx'))
 			self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx'))
 			self.marketplace.competitor_reaction_dataframe.to_excel(
 				os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
+
+		best_profit = -np.inf
+		# iterate through the saved models and evaluate them by running the exampleprinter
+		for model_file in os.listdir(save_path):
+			agent = type(self)(self.config_market, self.config_rl, self.marketplace, load_path=os.path.join(save_path, model_file))
+			exampleprinter = ExamplePrinter(self.config_market)
+			marketplace = CircularEconomyRebuyPriceDuopoly(self.config_market, support_continuous_action_space=True)
+			exampleprinter.setup_exampleprinter(marketplace, agent)
+			_, info_sequence = exampleprinter.run_example()
+			profit = np.mean(info_sequence['profits/all/vendor_0'])
+			print(f'profit per step of {model_file}: {profit}')
+			if profit > best_profit:
+				best_profit = profit
+				best_model = model_file
+		print(f'best model: {best_model} with profit {best_profit}')
 		return save_path
 
 	@staticmethod
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index ceb0202b..8c36546a 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -143,7 +143,7 @@ def train_stable_baselines_ppo():
 	StableBaselinesPPO(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=used_marketplace(config_market, True)).train_agent(500000)
+		marketplace=used_marketplace(config_market, True)).train_with_default_eval(50000)
 
 
 def train_stable_baselines_sac():

From 1d0c84929aeb927f3b3da9462f5da52a3a8d7336 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Mon, 3 Apr 2023 01:36:16 +0200
Subject: [PATCH 16/20] small changes

---
 recommerce/rl/ablation_study.py    | 11 ++++++-----
 recommerce/rl/training_scenario.py |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 721733ed..1f5ce969 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -135,12 +135,13 @@ def get_different_market_configs(parameter_name, values):
 
     print(f'Now I start the experiments. There are {len(market_configs)} experiments in total.')
     dataframes = []
-    for i in range(0, len(market_configs), 8):
-        print(f'Now I start the experiments {i}-{i+8}')
-        tmp_dataframe = run_group(market_configs[i:i+8], descriptions[i:i+8], 1000000)
+    parallel_runs = 4
+    for i in range(0, len(market_configs), parallel_runs):
+        print(f'Now I start the experiments {i}-{i+parallel_runs}')
+        tmp_dataframe = run_group(market_configs[i:i+parallel_runs], descriptions[i:i+parallel_runs], 1000000)
         dataframes.append(tmp_dataframe)
-        print(f'Saving dataframe {i}-{i+8}')
-        tmp_dataframe.to_excel(os.path.join(PathManager.results_path, f'dataframe{i}-{i+8}.xlsx'), index=False)
+        print(f'Saving dataframe {i}-{i+parallel_runs}')
+        tmp_dataframe.to_excel(os.path.join(PathManager.results_path, f'dataframe{i}-{i+parallel_runs}.xlsx'), index=False)
     dataframe = pd.concat(dataframes)
     print('Now I have the dataframe. I save it...')
     dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe.xlsx'), index=False)
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index 8c36546a..f84aee90 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -137,13 +137,13 @@ def train_stable_baselines_a2c():
 
 
 def train_stable_baselines_ppo():
-	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopolyFitted
+	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopoly
 	config_market: AttrDict = HyperparameterConfigLoader.load('market_config', used_marketplace)
 	config_rl: AttrDict = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
 	StableBaselinesPPO(
 		config_market=config_market,
 		config_rl=config_rl,
-		marketplace=used_marketplace(config_market, True)).train_with_default_eval(50000)
+		marketplace=used_marketplace(config_market, True)).train_with_default_eval(1000000)
 
 
 def train_stable_baselines_sac():

From 636aa185687976728fc8c943b2d0011e5fa9778e Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Tue, 11 Apr 2023 10:47:16 +0200
Subject: [PATCH 17/20] small changes

---
 recommerce/market/samples_generation.py | 4 ++--
 recommerce/monitoring/exampleprinter.py | 2 +-
 recommerce/rl/training_scenario.py      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py
index 00bbc893..00d1df7c 100644
--- a/recommerce/market/samples_generation.py
+++ b/recommerce/market/samples_generation.py
@@ -5,13 +5,13 @@
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
-from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSSCurve
+from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 
 if __name__ == '__main__':
 	config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
 	exampleprinter = ExamplePrinter(config_market)
-	agent = RuleBasedCERebuyAgentSSCurve(config_market, 'Sample Collector', True)
+	agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True)
 	marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True)
 	exampleprinter.setup_exampleprinter(marketplace, agent)
 	for _ in tqdm(range(20)):
diff --git a/recommerce/monitoring/exampleprinter.py b/recommerce/monitoring/exampleprinter.py
index fcdd80db..f5dbf789 100644
--- a/recommerce/monitoring/exampleprinter.py
+++ b/recommerce/monitoring/exampleprinter.py
@@ -93,7 +93,7 @@ def run_example(self, save_diagrams=True, evaluation_left_bound=450, evaluation_
 
 		signature = f'exampleprinter_{time.strftime("%b%d_%H-%M-%S")}'
 		writer = SummaryWriter(log_dir=os.path.join(PathManager.results_path, 'runs', signature))
-		os.makedirs(os.path.join(PathManager.results_path, 'exampleprinter', signature))
+		os.makedirs(os.path.join(PathManager.results_path, 'exampleprinter', signature), exist_ok=True)
 
 		if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams:
 			svg_manipulator = SVGManipulator(signature)
diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py
index f84aee90..f99abff5 100644
--- a/recommerce/rl/training_scenario.py
+++ b/recommerce/rl/training_scenario.py
@@ -137,7 +137,7 @@ def train_stable_baselines_a2c():
 
 
 def train_stable_baselines_ppo():
-	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopoly
+	used_marketplace = circular_market.CircularEconomyRebuyPriceDuopolyFitted
 	config_market: AttrDict = HyperparameterConfigLoader.load('market_config', used_marketplace)
 	config_rl: AttrDict = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
 	StableBaselinesPPO(

From cf8be41f32d9f114aaf1f909526812b8f41cb87e Mon Sep 17 00:00:00 2001
From: Jan Groeneveld <jan.groeneveld@student.hpi.de>
Date: Tue, 11 Apr 2023 22:10:32 +0200
Subject: [PATCH 18/20] last changes desktop

---
 .../market/circular/circular_customers.py     |  1 -
 .../market/circular/circular_sim_market.py    |  4 +--
 recommerce/market/samples_generation.py       |  4 +--
 recommerce/market/sim_market.py               |  2 +-
 .../stable_baselines_model.py                 | 27 ++++++++++++++++---
 5 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py
index f811371e..dac2899a 100644
--- a/recommerce/market/circular/circular_customers.py
+++ b/recommerce/market/circular/circular_customers.py
@@ -54,7 +54,6 @@ def create_x_with_binary_features(self, X):
 	def __init__(self) -> None:
 		if not hasattr(LinearRegressionCustomer, 'regressor'):
 			customers_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'customers_dataframe.xlsx'))
-			print('Dataset read')
 			X = customers_dataframe.iloc[:, 0:6].values
 			# X = self.create_x_with_binary_features(X)
 			Y = customers_dataframe.iloc[:, 6:9].values
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index 8ccb1973..248bcb55 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -130,7 +130,7 @@ def _simulate_owners(self, profits) -> None:
 		assert len(return_probabilities) == 2 + self._number_of_vendors, \
 			'the length of return_probabilities must be the number of vendors plus 2'
 
-		if np.abs(np.sum(return_probabilities) - 1) < 0.001:
+		if np.abs(np.sum(return_probabilities) - 1) < 0.0001:
 			number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors)
 			owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist()
 		else:
@@ -379,7 +379,7 @@ def get_num_competitors() -> int:
 		return 1
 
 	def _get_competitor_list(self) -> list:
-		return [circular_vendors.RuleBasedCERebuyAgentCompetitive(config_market=self.config,
+		return [circular_vendors.LinearRegressionCERebuyAgent(config_market=self.config,
 			continuous_action_space=self.support_continuous_action_space)]
 
 	def _choose_customer(self) -> Customer:
diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py
index 00d1df7c..00bbc893 100644
--- a/recommerce/market/samples_generation.py
+++ b/recommerce/market/samples_generation.py
@@ -5,13 +5,13 @@
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
-from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector
+from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSSCurve
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 
 if __name__ == '__main__':
 	config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
 	exampleprinter = ExamplePrinter(config_market)
-	agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True)
+	agent = RuleBasedCERebuyAgentSSCurve(config_market, 'Sample Collector', True)
 	marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True)
 	exampleprinter.setup_exampleprinter(marketplace, agent)
 	for _ in tqdm(range(20)):
diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py
index 069af843..dc9b7b2f 100644
--- a/recommerce/market/sim_market.py
+++ b/recommerce/market/sim_market.py
@@ -155,7 +155,7 @@ def _simulate_customers(self, profits, number_of_customers) -> None:
 		assert isinstance(probability_distribution, np.ndarray), 'generate_purchase_probabilities_from_offer must return an np.ndarray'
 		assert self._is_probability_distribution_fitting_exactly(probability_distribution)
 
-		if np.abs(np.sum(probability_distribution) - 1) < 0.001:
+		if np.abs(np.sum(probability_distribution) - 1) < 0.0001:
 			customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist()
 		else:
 			# Warning: This is not a probability distribution. This should be refactored.
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 1f6b8efa..3e6417f1 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -3,11 +3,12 @@
 from abc import ABC, abstractmethod
 
 import numpy as np
+import pandas as pd
 from attrdict import AttrDict
 from stable_baselines3.common.callbacks import CheckpointCallback
 
 from recommerce.configuration.path_manager import PathManager
-from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted
 from recommerce.market.circular.circular_vendors import CircularAgent
 from recommerce.market.linear.linear_vendors import LinearAgent
 from recommerce.market.sim_market import SimMarket
@@ -63,8 +64,9 @@ def train_agent(self, training_steps=100001, iteration_length=500, analyze_after
 		return callback.watcher
 
 	def train_with_default_eval(self, training_steps=100001):
-		save_path = os.path.join(PathManager.results_path, f'model_files_{time.strftime("%b%d_%H-%M-%S")}', f'{self.name}')
-		log_path = os.path.join(PathManager.results_path, 'logs', f'{self.name}')
+		token = time.strftime('%b%d_%H-%M-%S')
+		save_path = os.path.join(PathManager.results_path, f'model_files_{token}', f'{self.name}')
+		log_path = os.path.join(PathManager.results_path, 'logs', f'{token}')
 		os.makedirs(log_path, exist_ok=True)
 		step_size = 25000
 		callback = CheckpointCallback(step_size, save_path=save_path)
@@ -76,19 +78,36 @@ def train_with_default_eval(self, training_steps=100001):
 				os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx'))
 
 		best_profit = -np.inf
+		profits = []
+		fitted_profits = []
 		# iterate through the saved models and evaluate them by running the exampleprinter
-		for model_file in os.listdir(save_path):
+		modelfiles = sorted(os.listdir(save_path))
+		for model_file in modelfiles:
+			print('I analyze the model: ', model_file)
 			agent = type(self)(self.config_market, self.config_rl, self.marketplace, load_path=os.path.join(save_path, model_file))
 			exampleprinter = ExamplePrinter(self.config_market)
 			marketplace = CircularEconomyRebuyPriceDuopoly(self.config_market, support_continuous_action_space=True)
 			exampleprinter.setup_exampleprinter(marketplace, agent)
 			_, info_sequence = exampleprinter.run_example()
 			profit = np.mean(info_sequence['profits/all/vendor_0'])
+			profits.append(profit)
 			print(f'profit per step of {model_file}: {profit}')
 			if profit > best_profit:
 				best_profit = profit
 				best_model = model_file
+
+			# evaluate on the fitted market
+			exampleprinter_fitted = ExamplePrinter(self.config_market)
+			marketplace = CircularEconomyRebuyPriceDuopolyFitted(self.config_market, support_continuous_action_space=True)
+			exampleprinter_fitted.setup_exampleprinter(marketplace, agent)
+			_, info_sequence = exampleprinter_fitted.run_example()
+			profit = np.mean(info_sequence['profits/all/vendor_0'])
+			fitted_profits.append(profit)
 		print(f'best model: {best_model} with profit {best_profit}')
+		print('Saving the results of the evaluation in the following path: ', log_path)
+		dataframe = pd.DataFrame.from_dict({'model': modelfiles, 'profit': profits, 'fitted_profit': fitted_profits})
+		dataframe.to_excel(os.path.join(log_path, f'evaluation_{time.strftime("%b%d_%H-%M-%S")}.xlsx'))
+		print('Done!')
 		return save_path
 
 	@staticmethod

From df0f3945bb38dbeeb3c9d9931ea4f64149079c89 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Sun, 14 May 2023 10:53:28 -0700
Subject: [PATCH 19/20] small changes

---
 .../configuration_files/market_config.json    |  3 +-
 .../market/circular/circular_vendors.py       |  7 ++--
 recommerce/market/samples_generation.py       |  4 +-
 recommerce/monitoring/exampleprinter.py       |  2 +-
 recommerce/rl/ablation_study.py               | 42 ++++++++++++-------
 .../configuration_files/market_config.json    |  3 +-
 6 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/recommerce/default_data/configuration_files/market_config.json b/recommerce/default_data/configuration_files/market_config.json
index 56d0a1ed..c8e4c5fc 100644
--- a/recommerce/default_data/configuration_files/market_config.json
+++ b/recommerce/default_data/configuration_files/market_config.json
@@ -15,5 +15,6 @@
     "upper_tolerance_new": 8.0,
     "share_interested_owners": 0.05,
     "competitor_lowest_storage_level": 6.5,
-    "competitor_ok_storage_level": 12.5
+    "competitor_ok_storage_level": 12.5,
+    "price_step_size": 1.0
 }
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index e864056a..0ba53717 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -198,7 +198,7 @@ def policy(self, observation, *_) -> tuple:
 		own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item()
 		competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True)
 
-		price_new = max(min(competitors_new_prices) - 1, self.config_market.production_price + 1)
+		price_new = max(min(competitors_new_prices) - self.config_market.price_step_size, self.config_market.production_price + 1)
 		# competitor's storage is ignored
 		if own_storage < self.config_market.competitor_lowest_storage_level + random.randint(-3, 3):
 			# fill up the storage immediately
@@ -232,7 +232,7 @@ def policy(self, observation, *_) -> tuple:
 		upper_bound_new = 9
 		lower_bound_refurbished = 1
 		upper_bound_refurbished = 7
-		step_size = 1
+		step_size = self.config_market.price_step_size
 		competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True)
 
 		new_price = upper_bound_new if competitors_new_prices[0] < lower_bound_new else competitors_new_prices[0] - step_size
@@ -247,8 +247,7 @@ def policy(self, observation, *_) -> tuple:
 		else:
 			rebuy_price = max(min(competitors_rebuy_prices) - 1, 2)
 
-		return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price))
-			if random.random() < 0.8 else (random.randint(0, 10), random.randint(0, 10), random.randint(0, 10)))
+		return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price)))
 
 
 class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py
index 00bbc893..00d1df7c 100644
--- a/recommerce/market/samples_generation.py
+++ b/recommerce/market/samples_generation.py
@@ -5,13 +5,13 @@
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
 from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
-from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSSCurve
+from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 
 if __name__ == '__main__':
 	config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
 	exampleprinter = ExamplePrinter(config_market)
-	agent = RuleBasedCERebuyAgentSSCurve(config_market, 'Sample Collector', True)
+	agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True)
 	marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True)
 	exampleprinter.setup_exampleprinter(marketplace, agent)
 	for _ in tqdm(range(20)):
diff --git a/recommerce/monitoring/exampleprinter.py b/recommerce/monitoring/exampleprinter.py
index f5dbf789..e8e30a4a 100644
--- a/recommerce/monitoring/exampleprinter.py
+++ b/recommerce/monitoring/exampleprinter.py
@@ -75,7 +75,7 @@ def _rearrange_info_dicts(self, info_dicts: list, evaluation_left_bound, evaluat
 				info_dict[key].append(info[key])
 		return info_dict
 
-	def run_example(self, save_diagrams=True, evaluation_left_bound=450, evaluation_right_bound=500) -> int:
+	def run_example(self, save_diagrams=False, evaluation_left_bound=450, evaluation_right_bound=500) -> int:
 		"""
 		Run a specified marketplace with a (pre-trained, if RL) agent and record various statistics using TensorBoard.
 
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 1f5ce969..0a1ef477 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -10,7 +10,7 @@
 
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
-from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 from recommerce.rl.stable_baselines.sb_ppo import StableBaselinesPPO
 
@@ -51,11 +51,11 @@ def create_relevant_dataframe(descriptions, info_sequences_list):
         ('sales price refurbished competitor',
             lambda info_sequence: np.sum(np.array(info_sequence['actions/price_refurbished/vendor_1']) *
             np.array(info_sequence['customer/purchases_refurbished/vendor_1'])) /
-            np.sum(info_sequence['customer/purchases_refurbished/vendor_1'])),
+            (np.sum(info_sequence['customer/purchases_refurbished/vendor_1']) + 1e-10)),
         ('sales price rebuy competitor',
             lambda info_sequence: np.sum(np.array(info_sequence['actions/price_rebuy/vendor_1']) *
             np.array(info_sequence['owner/rebuys/vendor_1'])) /
-            np.sum(info_sequence['owner/rebuys/vendor_1'])),
+            (np.sum(info_sequence['owner/rebuys/vendor_1']) + 1e-10)),
         ('inventory level competitor', lambda info_sequence: np.mean(info_sequence['state/in_storage/vendor_1'])),
         ('resources in use', lambda info_sequence: np.mean(info_sequence['state/in_circulation'])),
         ('throw away', lambda info_sequence: np.mean(info_sequence['owner/throw_away']))
@@ -115,17 +115,18 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    experiments = [('max_storage', [20, 50, 200]),
-        ('production_price', [2, 4]),
-        ('number_of_customers', [10, 30]),
-        ('storage_cost', [0.01, 0.1, 0.2]),
-        ('compared_value_old', [0.4, 0.6]),
-        ('upper_tolerance_old', [4.0, 6.0]),
-        ('upper_tolerance_new', [7.0, 9.0]),
-        ('share_interested_owners', [0.025, 0.075]),
-        ('competitor_lowest_storage_level', [4.5, 8.5]),
-        ('competitor_ok_storage_level', [9.5, 15.5])
-    ]
+    experiments = [('price_step_size', [1.5, 1, 0.5, 0.25])]
+    # experiments = [('max_storage', [20, 50, 200]),
+    #     ('production_price', [2, 4]),
+    #     ('number_of_customers', [10, 30]),
+    #     ('storage_cost', [0.01, 0.1, 0.2]),
+    #     ('compared_value_old', [0.4, 0.6]),
+    #     ('upper_tolerance_old', [4.0, 6.0]),
+    #     ('upper_tolerance_new', [7.0, 9.0]),
+    #     ('share_interested_owners', [0.025, 0.075]),
+    #     ('competitor_lowest_storage_level', [4.5, 8.5]),
+    #     ('competitor_ok_storage_level', [9.5, 15.5])
+    # ]
     market_configs, descriptions = [], []
     for experiment in experiments:
         print(experiment)
@@ -146,3 +147,16 @@ def get_different_market_configs(parameter_name, values):
     print('Now I have the dataframe. I save it...')
     dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe.xlsx'), index=False)
     print('Done')
+
+    # market_config = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
+    # rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
+    # load_path = os.path.join(PathManager.data_path, 'rl_model_700000_steps.zip')
+    # agent = StableBaselinesPPO(market_config, rl_config, CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), name='PPO on fitted market', load_path=load_path)
+    # exampleprinter_real = ExamplePrinter(market_config)
+    # exampleprinter_real.setup_exampleprinter(CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), agent)
+    # _, info_sequences = exampleprinter_real.run_example(save_diagrams=False)
+    # exampleprinter_fitted = ExamplePrinter(market_config)
+    # exampleprinter_fitted.setup_exampleprinter(CircularEconomyRebuyPriceDuopolyFitted(market_config, support_continuous_action_space=True), agent)
+    # _, info_sequences_fitted = exampleprinter_fitted.run_example(save_diagrams=False)
+    # dataframe = create_relevant_dataframe(['Values on real market', 'Values on fitted market'], [info_sequences, info_sequences_fitted])
+    # dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe_fitted_vs_real.xlsx'), index=False)
diff --git a/tests/test_data/configuration_files/market_config.json b/tests/test_data/configuration_files/market_config.json
index cede30c8..bd83cf62 100644
--- a/tests/test_data/configuration_files/market_config.json
+++ b/tests/test_data/configuration_files/market_config.json
@@ -14,5 +14,6 @@
     "upper_tolerance_new": 8.0,
     "share_interested_owners": 0.05,
     "competitor_lowest_storage_level": 6.5,
-    "competitor_ok_storage_level": 12.5
+    "competitor_ok_storage_level": 12.5,
+    "price_step_size": 1.0
 }

From c671888b8c26b4659de990e2c863a26a27cabaf2 Mon Sep 17 00:00:00 2001
From: Jan Niklas Groeneveld <jan.niklas.groeneveld@gmail.com>
Date: Sun, 21 May 2023 23:11:56 -0700
Subject: [PATCH 20/20] next experiments

---
 .../configuration/hyperparameter_config.py     |  2 +-
 .../market/circular/circular_sim_market.py     |  2 +-
 recommerce/market/circular/circular_vendors.py | 13 ++++++++-----
 recommerce/rl/ablation_study.py                | 14 +++++++++-----
 .../stable_baselines/stable_baselines_model.py | 18 +++++++++---------
 5 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/recommerce/configuration/hyperparameter_config.py b/recommerce/configuration/hyperparameter_config.py
index 212bbe41..1c095576 100644
--- a/recommerce/configuration/hyperparameter_config.py
+++ b/recommerce/configuration/hyperparameter_config.py
@@ -125,5 +125,5 @@ def load(cls, filename: str, checked_class: SimMarket or Agent) -> AttrDict:
 		with open(path) as config_file:
 			hyperparameter_config = json.load(config_file)
 
-		HyperparameterConfigValidator.validate_config(config=hyperparameter_config, checked_class=checked_class)
+		# HyperparameterConfigValidator.validate_config(config=hyperparameter_config, checked_class=checked_class)
 		return AttrDict(hyperparameter_config)
diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py
index 248bcb55..063e7c90 100644
--- a/recommerce/market/circular/circular_sim_market.py
+++ b/recommerce/market/circular/circular_sim_market.py
@@ -407,4 +407,4 @@ def _get_competitor_list(self) -> list:
 			circular_vendors.FixedPriceCERebuyAgent(config_market=self.config, fixed_price=(3, 6, 2)),
 			circular_vendors.RuleBasedCERebuyAgentStorageMinimizer(config_market=self.config,
 				continuous_action_space=self.support_continuous_action_space),
-			]
+			][0:self.config.oligopol_competitors]
diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py
index 0ba53717..c12c0930 100644
--- a/recommerce/market/circular/circular_vendors.py
+++ b/recommerce/market/circular/circular_vendors.py
@@ -254,12 +254,15 @@ class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent):
 	"""
 	This vendor's policy is aiming to succeed by undercutting the competitor's prices.
 	"""
-	def create_x_with_binary_features(self, X):
+	def create_x_with_additional_features(self, X):
+		spike_points = [(0.0, 2.0), (2.0, 2.0), (4.0, 2.0), (7.0, 3.0)]
 		X_dash_list = []
-		for price_threshhold in range(10):
+		for mid, plusminus in spike_points:
 			# iterate throw the columns
 			for i_feature, column in enumerate(X.T):
-				column_values = np.where(column > price_threshhold, 1, 0)
+				tmp = np.ones_like(column)
+				inner = tmp - np.abs(column - mid * tmp) / (plusminus * tmp)
+				column_values = np.maximum(inner, 0 * tmp)
 				# append the new column to X
 				X_dash_list.append(column_values.reshape(-1, 1))
 		X_dash = np.concatenate(X_dash_list, axis=1)
@@ -272,7 +275,7 @@ def __init__(self, config_market: AttrDict, name='', continuous_action_space: bo
 			competitor_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'competitor_reaction_dataframe.xlsx'))[:-5000]
 			X = competitor_dataframe.iloc[:, 0:3].values
 
-			X = self.create_x_with_binary_features(X)
+			X = self.create_x_with_additional_features(X)
 			# define Y as the last 3 columns
 			Y = competitor_dataframe.iloc[:, 3:6].values
 			LinearRegressionCERebuyAgent.regressor = LinearRegression()
@@ -289,7 +292,7 @@ def __init__(self, config_market: AttrDict, name='', continuous_action_space: bo
 
 	def policy(self, observation, *_) -> tuple:
 		assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray'
-		observation = self.create_x_with_binary_features(observation[2:5].reshape(1, -1))
+		observation = self.create_x_with_additional_features(observation[2:5].reshape(1, -1))
 		prediction = LinearRegressionCERebuyAgent.regressor.predict(observation)
 		# clamp all values of prediction between 0 and 10
 		prediction = np.clip(prediction, 0, 10)
diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py
index 0a1ef477..a03ee952 100644
--- a/recommerce/rl/ablation_study.py
+++ b/recommerce/rl/ablation_study.py
@@ -10,7 +10,7 @@
 
 from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader
 from recommerce.configuration.path_manager import PathManager
-from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted
+from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted, CircularEconomyRebuyPriceOligopoly
 from recommerce.monitoring.exampleprinter import ExamplePrinter
 from recommerce.rl.stable_baselines.sb_ppo import StableBaselinesPPO
 
@@ -83,13 +83,15 @@ def run_training_session(market_class, config_market, agent_class, config_rl, tr
 
 
 def run_group(market_configs, market_descriptions, training_steps, target_function=run_training_session):
-    market_class = CircularEconomyRebuyPriceDuopoly
+    market_class = CircularEconomyRebuyPriceDuopoly if \
+        'oligopol_competitors' not in market_configs[0].keys() else CircularEconomyRebuyPriceOligopoly
     rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
     pipes = []
     for _ in market_configs:
         pipes.append(Pipe(False))
+    print(market_configs)
     processes = [Process(target=target_function,
-                         args=(market_class, config_market, StableBaselinesPPO, rl_config, training_steps, description, pipe_entry))
+                         args=(CircularEconomyRebuyPriceDuopoly if 'oligopol_competitors' not in config_market.keys() else CircularEconomyRebuyPriceOligopoly, config_market, StableBaselinesPPO, rl_config, training_steps, description, pipe_entry))
         for config_market, description, (_, pipe_entry) in zip(market_configs, market_descriptions, pipes)]
     print('Now I start the processes')
     for p in processes:
@@ -115,7 +117,7 @@ def get_different_market_configs(parameter_name, values):
 
 
 if __name__ == '__main__':
-    experiments = [('price_step_size', [1.5, 1, 0.5, 0.25])]
+    # experiments = [('price_step_size', [1.5, 1, 0.5, 0.25])]
     # experiments = [('max_storage', [20, 50, 200]),
     #     ('production_price', [2, 4]),
     #     ('number_of_customers', [10, 30]),
@@ -127,6 +129,8 @@ def get_different_market_configs(parameter_name, values):
     #     ('competitor_lowest_storage_level', [4.5, 8.5]),
     #     ('competitor_ok_storage_level', [9.5, 15.5])
     # ]
+    experiments = [('storage_cost', [0.01, 0.1, 0.25, 0.5]),
+                   ('oligopol_competitors', [1, 2, 3, 4])]
     market_configs, descriptions = [], []
     for experiment in experiments:
         print(experiment)
@@ -150,7 +154,7 @@ def get_different_market_configs(parameter_name, values):
 
     # market_config = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly)
     # rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO)
-    # load_path = os.path.join(PathManager.data_path, 'rl_model_700000_steps.zip')
+    # load_path = os.path.join(PathManager.data_path, 'rl_model_300000_steps.zip')
     # agent = StableBaselinesPPO(market_config, rl_config, CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), name='PPO on fitted market', load_path=load_path)
     # exampleprinter_real = ExamplePrinter(market_config)
     # exampleprinter_real.setup_exampleprinter(CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), agent)
diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py
index 3e6417f1..c6eb2a6c 100644
--- a/recommerce/rl/stable_baselines/stable_baselines_model.py
+++ b/recommerce/rl/stable_baselines/stable_baselines_model.py
@@ -79,14 +79,14 @@ def train_with_default_eval(self, training_steps=100001):
 
 		best_profit = -np.inf
 		profits = []
-		fitted_profits = []
+		# fitted_profits = []
 		# iterate through the saved models and evaluate them by running the exampleprinter
 		modelfiles = sorted(os.listdir(save_path))
 		for model_file in modelfiles:
 			print('I analyze the model: ', model_file)
 			agent = type(self)(self.config_market, self.config_rl, self.marketplace, load_path=os.path.join(save_path, model_file))
 			exampleprinter = ExamplePrinter(self.config_market)
-			marketplace = CircularEconomyRebuyPriceDuopoly(self.config_market, support_continuous_action_space=True)
+			marketplace = type(self.marketplace)(self.config_market, support_continuous_action_space=True)
 			exampleprinter.setup_exampleprinter(marketplace, agent)
 			_, info_sequence = exampleprinter.run_example()
 			profit = np.mean(info_sequence['profits/all/vendor_0'])
@@ -97,15 +97,15 @@ def train_with_default_eval(self, training_steps=100001):
 				best_model = model_file
 
 			# evaluate on the fitted market
-			exampleprinter_fitted = ExamplePrinter(self.config_market)
-			marketplace = CircularEconomyRebuyPriceDuopolyFitted(self.config_market, support_continuous_action_space=True)
-			exampleprinter_fitted.setup_exampleprinter(marketplace, agent)
-			_, info_sequence = exampleprinter_fitted.run_example()
-			profit = np.mean(info_sequence['profits/all/vendor_0'])
-			fitted_profits.append(profit)
+			# exampleprinter_fitted = ExamplePrinter(self.config_market)
+			# marketplace = CircularEconomyRebuyPriceDuopolyFitted(self.config_market, support_continuous_action_space=True)
+			# exampleprinter_fitted.setup_exampleprinter(marketplace, agent)
+			# _, info_sequence = exampleprinter_fitted.run_example()
+			# profit = np.mean(info_sequence['profits/all/vendor_0'])
+			# fitted_profits.append(profit)
 		print(f'best model: {best_model} with profit {best_profit}')
 		print('Saving the results of the evaluation in the following path: ', log_path)
-		dataframe = pd.DataFrame.from_dict({'model': modelfiles, 'profit': profits, 'fitted_profit': fitted_profits})
+		dataframe = pd.DataFrame.from_dict({'model': modelfiles, 'profit': profits})
 		dataframe.to_excel(os.path.join(log_path, f'evaluation_{time.strftime("%b%d_%H-%M-%S")}.xlsx'))
 		print('Done!')
 		return save_path