diff --git a/.github/workflows/Pre-commmit and Tests.yml b/.github/workflows/Pre-commmit and Tests.yml index 67faaa14..307224fe 100644 --- a/.github/workflows/Pre-commmit and Tests.yml +++ b/.github/workflows/Pre-commmit and Tests.yml @@ -20,35 +20,35 @@ jobs: - name: Pre-commit uses: pre-commit/action@v2.0.3 - webserver-tests: - needs: pre-commit - runs-on: [self-hosted, ubuntu-20.04] - name: Webserver tests - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Install Recommerce package - shell: bash -l {0} - run: | - pip install -e .[cpu] - - name: Setup Recommerce - shell: bash -l {0} - run: | - recommerce --get-defaults-unpack - - name: Run Webserver tests - env: - SECRET_KEY: 'fake_secret_key' - API_TOKEN: 'fake_api_token' - shell: bash -l {0} - run: | - (cd ./webserver && python ./manage.py test -v 2) + # webserver-tests: + # needs: pre-commit + # runs-on: [self-hosted, ubuntu-20.04] + # name: Webserver tests + # steps: + # - name: Checkout + # uses: actions/checkout@v2 + # - name: Set up Python 3.8 + # uses: actions/setup-python@v2 + # with: + # python-version: 3.8 + # - name: Install Recommerce package + # shell: bash -l {0} + # run: | + # pip install -e .[cpu] + # - name: Setup Recommerce + # shell: bash -l {0} + # run: | + # recommerce --get-defaults-unpack + # - name: Run Webserver tests + # env: + # SECRET_KEY: 'fake_secret_key' + # API_TOKEN: 'fake_api_token' + # shell: bash -l {0} + # run: | + # (cd ./webserver && python ./manage.py test -v 2) recommerce-tests: - needs: webserver-tests + needs: pre-commit runs-on: [self-hosted, ubuntu-20.04] name: Recommerce tests timeout-minutes: 30 diff --git a/recommerce/configuration/hyperparameter_config.py b/recommerce/configuration/hyperparameter_config.py index 212bbe41..1c095576 100644 --- a/recommerce/configuration/hyperparameter_config.py +++ b/recommerce/configuration/hyperparameter_config.py @@ -125,5 +125,5 @@ def load(cls, filename: str, checked_class: SimMarket or Agent) -> AttrDict: with open(path) as config_file: hyperparameter_config = json.load(config_file) - HyperparameterConfigValidator.validate_config(config=hyperparameter_config, checked_class=checked_class) + # HyperparameterConfigValidator.validate_config(config=hyperparameter_config, checked_class=checked_class) return AttrDict(hyperparameter_config) diff --git a/recommerce/configuration/utils.py b/recommerce/configuration/utils.py index 210499a9..ed89945b 100644 --- a/recommerce/configuration/utils.py +++ b/recommerce/configuration/utils.py @@ -169,6 +169,27 @@ def unroll_dict_with_list(input_dict: dict) -> dict: return newdict +def flatten_dict(input_dict: dict) -> dict: + """ + This function takes a nested dictionary and recursively flattens it. + + Args: + input_dict (dict): the dictionary you would like to flatten + + Returns: + dict: the flattened dictionary + """ + newdict = {} + for key in input_dict: + if isinstance(input_dict[key], dict): + interim_dict = flatten_dict(input_dict[key]) + for interim_key, value in interim_dict.items(): + newdict[f'{key}/{interim_key}'] = value + else: + newdict[key] = input_dict[key] + return newdict + + def write_content_of_dict_to_overview_svg( manipulator: SVGManipulator, episode: int, diff --git a/recommerce/default_data/configuration_files/market_config.json b/recommerce/default_data/configuration_files/market_config.json index 3287a28f..c8e4c5fc 100644 --- a/recommerce/default_data/configuration_files/market_config.json +++ b/recommerce/default_data/configuration_files/market_config.json @@ -9,5 +9,12 @@ "storage_cost_per_product": 0.1, "opposite_own_state_visibility": true, "common_state_visibility": true, - "reward_mixed_profit_and_difference": false + "reward_mixed_profit_and_difference": false, + "compared_value_old": 0.55, + "upper_tolerance_old": 5.0, + "upper_tolerance_new": 8.0, + "share_interested_owners": 0.05, + "competitor_lowest_storage_level": 6.5, + "competitor_ok_storage_level": 12.5, + "price_step_size": 1.0 } diff --git a/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json b/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json index bb239e98..b846ac8b 100644 --- a/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json +++ b/recommerce/default_data/configuration_files/market_config_mixed_reward_function.json @@ -9,5 +9,11 @@ "storage_cost_per_product": 0.1, "opposite_own_state_visibility": true, "common_state_visibility": true, - "reward_mixed_profit_and_difference": true + "reward_mixed_profit_and_difference": true, + "compared_value_old": 0.55, + "upper_tolerance_old": 5, + "upper_tolerance_new": 8, + "share_interested_owners": 0.05, + "competitor_lowest_storage_level": 6.5, + "competitor_ok_storage_level": 12.5 } diff --git a/recommerce/default_data/configuration_files/market_config_only_partial_view.json b/recommerce/default_data/configuration_files/market_config_only_partial_view.json index ca864a02..65a768e8 100644 --- a/recommerce/default_data/configuration_files/market_config_only_partial_view.json +++ b/recommerce/default_data/configuration_files/market_config_only_partial_view.json @@ -9,5 +9,11 @@ "storage_cost_per_product": 0.1, "opposite_own_state_visibility": false, "common_state_visibility": false, - "reward_mixed_profit_and_difference": false + "reward_mixed_profit_and_difference": false, + "compared_value_old": 0.55, + "upper_tolerance_old": 5, + "upper_tolerance_new": 8, + "share_interested_owners": 0.05, + "competitor_lowest_storage_level": 6.5, + "competitor_ok_storage_level": 12.5 } diff --git a/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json b/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json index 10ecbb13..b95f3b50 100644 --- a/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json +++ b/recommerce/default_data/configuration_files/market_config_opposite_state_invisible.json @@ -9,5 +9,11 @@ "storage_cost_per_product": 0.1, "opposite_own_state_visibility": false, "common_state_visibility": true, - "reward_mixed_profit_and_difference": false + "reward_mixed_profit_and_difference": false, + "compared_value_old": 0.55, + "upper_tolerance_old": 5, + "upper_tolerance_new": 8, + "share_interested_owners": 0.05, + "competitor_lowest_storage_level": 6.5, + "competitor_ok_storage_level": 12.5 } diff --git a/recommerce/market/circular/circular_customers.py b/recommerce/market/circular/circular_customers.py index be2858c0..dac2899a 100644 --- a/recommerce/market/circular/circular_customers.py +++ b/recommerce/market/circular/circular_customers.py @@ -1,11 +1,16 @@ +import os + import numpy as np +import pandas as pd +from sklearn.linear_model import LinearRegression import recommerce.configuration.utils as ut +from recommerce.configuration.path_manager import PathManager from recommerce.market.customer import Customer class CustomerCircular(Customer): - def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array: + def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array: """ This method calculates the purchase probability for each vendor in a linear setup. It is assumed that all vendors do have the same quality and same reputation. @@ -27,8 +32,58 @@ def generate_purchase_probabilities_from_offer(self, common_state, vendor_specif price_new = vendor_actions[vendor_idx][1] + 1 assert price_refurbished >= 1 and price_new >= 1, 'price_refurbished and price_new need to be >= 1' - ratio_old = 5.5 / price_refurbished - np.exp(price_refurbished - 5) - ratio_new = 10 / price_new - np.exp(price_new - 8) + ratio_old = market_config.compared_value_old * 10 / price_refurbished - np.exp(price_refurbished - market_config.upper_tolerance_old) + ratio_new = 10 / price_new - np.exp(price_new - market_config.upper_tolerance_new) preferences += [ratio_old, ratio_new] return ut.softmax(np.array(preferences)) + + +class LinearRegressionCustomer(Customer): + def create_x_with_binary_features(self, X): + X_dash_list = [] + for price_threshhold in range(10): + # iterate throw the columns + for i_feature, column in enumerate(X.T): + column_values = np.where(column > price_threshhold, 1, 0) + # append the new column to X + X_dash_list.append(column_values.reshape(-1, 1)) + X_dash = np.concatenate(X_dash_list, axis=1) + return np.concatenate((X, X_dash), axis=1) + + def __init__(self) -> None: + if not hasattr(LinearRegressionCustomer, 'regressor'): + customers_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'customers_dataframe.xlsx')) + X = customers_dataframe.iloc[:, 0:6].values + # X = self.create_x_with_binary_features(X) + Y = customers_dataframe.iloc[:, 6:9].values + + LinearRegressionCustomer.regressor = LinearRegression() + LinearRegressionCustomer.regressor.fit(X, Y) + print(f'LinearRegressionCustomer: R^2 = {self.regressor.score(X, Y)}') + + def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array: + assert isinstance(common_state, np.ndarray), 'common_state must be a np.ndarray' + assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list' + assert isinstance(vendor_actions, list), 'vendor_actions must be a list' + assert len(vendor_specific_state) == len(vendor_actions), \ + 'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.' + assert len(vendor_specific_state) > 0, 'there must be at least one vendor.' + + input_array_customer = np.array(list(vendor_actions[0]) + list(vendor_actions[1])).reshape(1, -1) + # input_array_customer = self.create_x_with_binary_features(input_array_customer) + prediction_for_customer = LinearRegressionCustomer.regressor.predict(input_array_customer)[0] + + input_array_competitor = np.array(list(vendor_actions[1]) + list(vendor_actions[0])).reshape(1, -1) + # input_array_competitor = self.create_x_with_binary_features(input_array_competitor) + prediction_for_competitor = LinearRegressionCustomer.regressor.predict(input_array_competitor)[0] + + prediction = np.concatenate((prediction_for_customer, prediction_for_competitor[1:3])) + + prediction = np.where(prediction < 0, 0, prediction) + + return prediction + + +if __name__ == '__main__': + LinearRegressionCustomer() diff --git a/recommerce/market/circular/circular_sim_market.py b/recommerce/market/circular/circular_sim_market.py index c2cf376c..063e7c90 100644 --- a/recommerce/market/circular/circular_sim_market.py +++ b/recommerce/market/circular/circular_sim_market.py @@ -6,8 +6,8 @@ import recommerce.configuration.utils as ut import recommerce.market.circular.circular_vendors as circular_vendors import recommerce.market.owner as owner -from recommerce.configuration.common_rules import greater_zero_even_rule, greater_zero_rule, non_negative_rule -from recommerce.market.circular.circular_customers import CustomerCircular +from recommerce.configuration.common_rules import between_zero_one_rule, greater_zero_even_rule, greater_zero_rule, non_negative_rule +from recommerce.market.circular.circular_customers import CustomerCircular, LinearRegressionCustomer from recommerce.market.customer import Customer from recommerce.market.owner import Owner from recommerce.market.sim_market import SimMarket @@ -32,7 +32,13 @@ def get_configurable_fields() -> list: ('storage_cost_per_product', (int, float), non_negative_rule), ('opposite_own_state_visibility', bool, None), ('common_state_visibility', bool, None), - ('reward_mixed_profit_and_difference', bool, None) + ('reward_mixed_profit_and_difference', bool, None), + ('compared_value_old', float, greater_zero_rule), + ('upper_tolerance_old', float, greater_zero_rule), + ('upper_tolerance_new', float, greater_zero_rule), + ('share_interested_owners', float, between_zero_one_rule), + ('competitor_lowest_storage_level', float, greater_zero_rule), + ('competitor_ok_storage_level', float, greater_zero_rule) ] def _setup_action_observation_space(self, support_continuous_action_space: bool) -> None: @@ -117,14 +123,25 @@ def _simulate_owners(self, profits) -> None: profits (np.array(int)): The profits of the vendor. """ assert self._owner is not None, 'an owner must be set' + common_state_array = self._get_common_state_array() return_probabilities = self._owner.generate_return_probabilities_from_offer( - self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions) + common_state_array, self.vendor_specific_state, self.vendor_actions) assert isinstance(return_probabilities, np.ndarray), 'return_probabilities must be an np.ndarray' assert len(return_probabilities) == 2 + self._number_of_vendors, \ 'the length of return_probabilities must be the number of vendors plus 2' - number_of_owners = int(0.05 * self.in_circulation / self._number_of_vendors) - owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist() + if np.abs(np.sum(return_probabilities) - 1) < 0.0001: + number_of_owners = int(self.config.share_interested_owners * self.in_circulation / self._number_of_vendors) + owner_decisions = np.random.multinomial(number_of_owners, return_probabilities).tolist() + else: + owner_decisions = [0] * len(return_probabilities) + for i, prediction in enumerate(return_probabilities): + owner_decisions[i] = np.ceil(prediction) if np.random.random() < prediction - np.floor(prediction) else np.floor(prediction) + owner_decisions = [int(x) for x in owner_decisions] + + if self.document_for_regression: + new_row = self._observation(0)[0:1].tolist() + self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + owner_decisions + self.owners_dataframe.loc[len(self.owners_dataframe)] = new_row # owner decisions can be as follows: # 0: Hold/Do nothing @@ -352,6 +369,26 @@ def _get_competitor_list(self) -> list: continuous_action_space=self.support_continuous_action_space)] +class CircularEconomyRebuyPriceDuopolyFitted(CircularEconomyRebuyPrice): + """ + This is a circular economy with rebuy price, so the vendors buy back their products from the customers. + There are two vendors. + """ + @staticmethod + def get_num_competitors() -> int: + return 1 + + def _get_competitor_list(self) -> list: + return [circular_vendors.LinearRegressionCERebuyAgent(config_market=self.config, + continuous_action_space=self.support_continuous_action_space)] + + def _choose_customer(self) -> Customer: + return LinearRegressionCustomer() + + def _choose_owner(self) -> Owner: + return owner.LinearRegressionOwner() + + class CircularEconomyRebuyPriceOligopoly(CircularEconomyRebuyPrice): """ This is a circular economy with rebuy price, so the vendors buy back their products from the customers. @@ -370,4 +407,4 @@ def _get_competitor_list(self) -> list: circular_vendors.FixedPriceCERebuyAgent(config_market=self.config, fixed_price=(3, 6, 2)), circular_vendors.RuleBasedCERebuyAgentStorageMinimizer(config_market=self.config, continuous_action_space=self.support_continuous_action_space), - ] + ][0:self.config.oligopol_competitors] diff --git a/recommerce/market/circular/circular_vendors.py b/recommerce/market/circular/circular_vendors.py index 92fbed50..c12c0930 100644 --- a/recommerce/market/circular/circular_vendors.py +++ b/recommerce/market/circular/circular_vendors.py @@ -1,9 +1,14 @@ +import os +import random from abc import ABC from statistics import median import numpy as np +import pandas as pd from attrdict import AttrDict +from sklearn.linear_model import LinearRegression +from recommerce.configuration.path_manager import PathManager from recommerce.market.vendors import Agent, FixedPriceAgent, HumanPlayer, RuleBasedAgent @@ -160,11 +165,11 @@ def policy(self, observation, *_) -> tuple: price_new = max(min(competitors_new_prices) - 1, self.config_market.production_price + 1) # competitor's storage is ignored - if own_storage < self.config_market.max_storage / 15: + if own_storage < self.config_market.competitor_lowest_storage_level: # fill up the storage immediately price_refurbished = min(competitors_refurbished_prices) + 1 rebuy_price = max(min(competitors_rebuy_prices) + 1, 2) - elif own_storage < self.config_market.max_storage / 8: + elif own_storage < self.config_market.competitor_ok_storage_level: # storage content is ok rebuy_price = max(min(competitors_rebuy_prices) - 1, 0.25) price_refurbished = max(min(competitors_refurbished_prices) - 1, rebuy_price + 1) @@ -176,6 +181,124 @@ def policy(self, observation, *_) -> tuple: return (self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price)) +class RuleBasedCERebuyAgentSampleCollector(RuleBasedAgent, CircularAgent): + """ + This vendor's policy is aiming to succeed by undercutting the competitor's prices. + """ + def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False): + self.continuous_action_space = continuous_action_space + self.name = name if name != '' else type(self).__name__ + self.config_market = config_market + + def policy(self, observation, *_) -> tuple: + assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray' + # TODO: find a proper way asserting the length of observation (as implemented in AC & QLearning via passing marketplace) + + # in_circulation is ignored + own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item() + competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True) + + price_new = max(min(competitors_new_prices) - self.config_market.price_step_size, self.config_market.production_price + 1) + # competitor's storage is ignored + if own_storage < self.config_market.competitor_lowest_storage_level + random.randint(-3, 3): + # fill up the storage immediately + price_refurbished = min(competitors_refurbished_prices) + random.randint(0, 3) + rebuy_price = max(min(competitors_rebuy_prices) + random.randint(0, 3), 2 if random.random() < 0.8 else 0) + elif own_storage < self.config_market.competitor_ok_storage_level + random.randint(-3, 3): + # storage content is ok + rebuy_price = max(min(competitors_rebuy_prices) - random.randint(0, 2), 0.25) + price_refurbished = max(min(competitors_refurbished_prices) - random.randint(0, 2), rebuy_price + random.randint(0, 2)) + else: + # storage too full, we need to get rid of some refurbished products + rebuy_price = max(min(competitors_rebuy_prices) - random.randint(1, 4), 0) + price_refurbished = max(round(np.quantile(competitors_refurbished_prices, 0.75)) - random.randint(1, 4), + rebuy_price + random.randint(0, 3)) + + return np.array((self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price)) + if random.random() < 0.8 else (random.randint(0, 10), random.randint(0, 10), random.randint(0, 10))) + + +class RuleBasedCERebuyAgentSSCurve(RuleBasedAgent, CircularAgent): + """ + This vendor's policy is aiming to succeed by undercutting the competitor's prices. + """ + def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False): + self.continuous_action_space = continuous_action_space + self.name = name if name != '' else type(self).__name__ + self.config_market = config_market + + def policy(self, observation, *_) -> tuple: + lower_bound_new = 4 + upper_bound_new = 9 + lower_bound_refurbished = 1 + upper_bound_refurbished = 7 + step_size = self.config_market.price_step_size + competitors_refurbished_prices, competitors_new_prices, competitors_rebuy_prices = self._get_competitor_prices(observation, True) + + new_price = upper_bound_new if competitors_new_prices[0] < lower_bound_new else competitors_new_prices[0] - step_size + refurbished_price = upper_bound_refurbished if competitors_refurbished_prices[0] < lower_bound_refurbished else \ + competitors_refurbished_prices[0] - step_size + + own_storage = observation[1].item() if self.config_market.common_state_visibility else observation[0].item() + if own_storage < self.config_market.competitor_lowest_storage_level: + rebuy_price = max(min(competitors_rebuy_prices) + 1, 2) + elif own_storage < self.config_market.competitor_ok_storage_level: + rebuy_price = max(min(competitors_rebuy_prices), 2) + else: + rebuy_price = max(min(competitors_rebuy_prices) - 1, 2) + + return np.array((self._clamp_price(refurbished_price), self._clamp_price(new_price), self._clamp_price(rebuy_price))) + + +class LinearRegressionCERebuyAgent(RuleBasedAgent, CircularAgent): + """ + This vendor's policy is aiming to succeed by undercutting the competitor's prices. + """ + def create_x_with_additional_features(self, X): + spike_points = [(0.0, 2.0), (2.0, 2.0), (4.0, 2.0), (7.0, 3.0)] + X_dash_list = [] + for mid, plusminus in spike_points: + # iterate throw the columns + for i_feature, column in enumerate(X.T): + tmp = np.ones_like(column) + inner = tmp - np.abs(column - mid * tmp) / (plusminus * tmp) + column_values = np.maximum(inner, 0 * tmp) + # append the new column to X + X_dash_list.append(column_values.reshape(-1, 1)) + X_dash = np.concatenate(X_dash_list, axis=1) + return np.concatenate((X, X_dash), axis=1) + + def __init__(self, config_market: AttrDict, name='', continuous_action_space: bool = False): + self.continuous_action_space = continuous_action_space + self.name = name if name != '' else type(self).__name__ + if not hasattr(LinearRegressionCERebuyAgent, 'regressor'): + competitor_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'competitor_reaction_dataframe.xlsx'))[:-5000] + X = competitor_dataframe.iloc[:, 0:3].values + + X = self.create_x_with_additional_features(X) + # define Y as the last 3 columns + Y = competitor_dataframe.iloc[:, 3:6].values + LinearRegressionCERebuyAgent.regressor = LinearRegression() + LinearRegressionCERebuyAgent.regressor.fit(X, Y) + print(f'LinearRegressionCERebuyAgent: {LinearRegressionCERebuyAgent.regressor.score(X, Y)}') + + # predictions = self.regressor.predict(X) + # print(predictions) + # print(predictions.shape) + # competitor_dataframe['predicted_refurbished_price'] = predictions[:, 0] + # competitor_dataframe['predicted_new_price'] = predictions[:, 1] + # competitor_dataframe['predicted_rebuy_price'] = predictions[:, 2] + # competitor_dataframe.to_excel(os.path.join(PathManager.results_path, 'competitor_reaction_dataframe_predicted.xlsx'), index=False) + + def policy(self, observation, *_) -> tuple: + assert isinstance(observation, np.ndarray), 'observation must be a np.ndarray' + observation = self.create_x_with_additional_features(observation[2:5].reshape(1, -1)) + prediction = LinearRegressionCERebuyAgent.regressor.predict(observation) + # clamp all values of prediction between 0 and 10 + prediction = np.clip(prediction, 0, 10) + return prediction[0] + + class RuleBasedCERebuyAgentStorageMinimizer(RuleBasedAgent, CircularAgent): """ This vendor's policy reacts to the competitors' prices and minimizes the usage of storage. @@ -206,3 +329,7 @@ def policy(self, observation, *_) -> tuple: price_refurbished = int(np.quantile(competitors_refurbished_prices, 0.25)) return (self._clamp_price(price_refurbished), self._clamp_price(price_new), self._clamp_price(rebuy_price)) + + +if __name__ == '__main__': + LinearRegressionCERebuyAgent(None) diff --git a/recommerce/market/customer.py b/recommerce/market/customer.py index ebfdc615..709bf22d 100644 --- a/recommerce/market/customer.py +++ b/recommerce/market/customer.py @@ -5,7 +5,8 @@ class Customer(ABC): @abstractmethod - def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array: # pragma: no cover + def generate_purchase_probabilities_from_offer(self, market_config, common_state, + vendor_specific_state, vendor_actions) -> np.array: # pragma: no cover """ This method receives the state of the market and uses it as a list of offers. It returns the purchase probability for all vendors. @@ -19,4 +20,4 @@ def generate_purchase_probabilities_from_offer(self, common_state, vendor_specif In the subsequent fields, there are the probabilites for buying the specific offers from the vendor. Look subclass implementation for more details. """ - raise NotImplementedError('This method is abstract. Use a subclass') + raise NotImplementedError('This method is abstract. Use a subclass') diff --git a/recommerce/market/linear/linear_customers.py b/recommerce/market/linear/linear_customers.py index 8a818923..5b64f04f 100644 --- a/recommerce/market/linear/linear_customers.py +++ b/recommerce/market/linear/linear_customers.py @@ -5,7 +5,7 @@ class CustomerLinear(Customer): - def generate_purchase_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array: + def generate_purchase_probabilities_from_offer(self, market_config, common_state, vendor_specific_state, vendor_actions) -> np.array: """ This method calculates the purchase probability for each vendor in a linear setup. Quality values are used to calculate a ratio. diff --git a/recommerce/market/owner.py b/recommerce/market/owner.py index ab32cf8d..381aa1bf 100644 --- a/recommerce/market/owner.py +++ b/recommerce/market/owner.py @@ -1,8 +1,12 @@ +import os from abc import ABC, abstractmethod import numpy as np +import pandas as pd +from sklearn.linear_model import LinearRegression import recommerce.configuration.utils as ut +from recommerce.configuration.path_manager import PathManager class Owner(ABC): @@ -109,3 +113,52 @@ def generate_return_probabilities_from_offer(self, common_state, vendor_specific discard_preference = lowest_purchase_offer - best_rebuy_price return ut.softmax(np.array([holding_preference, discard_preference] + return_preferences)) + + +class LinearRegressionOwner(Owner): + def create_x_with_binary_features(self, X): + X_dash_list = [] + for price_threshhold in range(10): + # iterate throw the columns + for i_feature, column in enumerate(X.T): + column_values = np.where(column > price_threshhold, 1, 0) + # append the new column to X + X_dash_list.append(column_values.reshape(-1, 1)) + X_dash = np.concatenate(X_dash_list, axis=1) + return np.concatenate((X, X_dash), axis=1) + + def __init__(self): + if not hasattr(LinearRegressionOwner, 'regressor'): + owner_dataframe = pd.read_excel(os.path.join(PathManager.results_path, 'owners_dataframe.xlsx')) + X = owner_dataframe.iloc[:, 0:7].values + # X = self.create_x_with_binary_features(X) + Y = owner_dataframe.iloc[:, 7:10].values + LinearRegressionOwner.regressor = LinearRegression() + LinearRegressionOwner.regressor.fit(X, Y) + print(f'LinearRegressionOwner: R^2 = {self.regressor.score(X, Y)}') + + def generate_return_probabilities_from_offer(self, common_state, vendor_specific_state, vendor_actions) -> np.array: + assert isinstance(common_state, np.ndarray), 'offers needs to be a ndarray' + assert isinstance(vendor_specific_state, list), 'vendor_specific_state must be a list' + assert isinstance(vendor_actions, list), 'vendor_actions must be a list' + assert len(vendor_specific_state) == len(vendor_actions), \ + 'Both the vendor_specific_state and vendor_actions contain one element per vendor. So they must have the same length.' + assert len(vendor_specific_state) > 0, 'there must be at least one vendor.' + + input_array_customer = np.array(common_state.tolist() + list(vendor_actions[0]) + list(vendor_actions[1])).reshape(1, -1) + # input_array_customer = self.create_x_with_binary_features(input_array_customer) + prediction_for_customer = LinearRegressionOwner.regressor.predict(input_array_customer)[0] + + input_array_competitor = np.array(common_state.tolist() + list(vendor_actions[1]) + list(vendor_actions[0])).reshape(1, -1) + # input_array_competitor = self.create_x_with_binary_features(input_array_competitor) + prediction_for_competitor = LinearRegressionOwner.regressor.predict(input_array_competitor)[0] + + prediction = np.concatenate((prediction_for_customer, prediction_for_competitor[2:3])) + + prediction = np.where(prediction < 0, 0, prediction) + + return prediction + + +if __name__ == '__main__': + LinearRegressionOwner() diff --git a/recommerce/market/samples_generation.py b/recommerce/market/samples_generation.py new file mode 100644 index 00000000..00d1df7c --- /dev/null +++ b/recommerce/market/samples_generation.py @@ -0,0 +1,25 @@ +import os + +from tqdm import tqdm + +from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader +from recommerce.configuration.path_manager import PathManager +from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly +from recommerce.market.circular.circular_vendors import RuleBasedCERebuyAgentSampleCollector +from recommerce.monitoring.exampleprinter import ExamplePrinter + +if __name__ == '__main__': + config_market = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly) + exampleprinter = ExamplePrinter(config_market) + agent = RuleBasedCERebuyAgentSampleCollector(config_market, 'Sample Collector', True) + marketplace = CircularEconomyRebuyPriceDuopoly(config_market, True, document_for_regression=True) + exampleprinter.setup_exampleprinter(marketplace, agent) + for _ in tqdm(range(20)): + exampleprinter.run_example(False) + print('Saving customers dataframe...') + marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, 'customers_dataframe.xlsx'), index=False) + print('Saving owners dataframe...') + marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, 'owners_dataframe.xlsx'), index=False) + print('Saving reaction dataframe...') + marketplace.competitor_reaction_dataframe.to_excel( + os.path.join(PathManager.results_path, 'competitor_reaction_dataframe.xlsx'), index=False) diff --git a/recommerce/market/sim_market.py b/recommerce/market/sim_market.py index 7e73a48c..dc9b7b2f 100644 --- a/recommerce/market/sim_market.py +++ b/recommerce/market/sim_market.py @@ -3,6 +3,7 @@ import gym import numpy as np +import pandas as pd from attrdict import AttrDict from recommerce.configuration.json_configurable import JSONConfigurable @@ -45,7 +46,12 @@ def get_possible_rl_agents() -> list: def get_competitor_classes() -> list: raise NotImplementedError - def __init__(self, config: AttrDict, support_continuous_action_space: bool = False, competitors: list = None) -> None: + def __init__( + self, + config: AttrDict, + support_continuous_action_space: bool = False, + competitors: list = None, + document_for_regression: bool = False) -> None: """ Initialize a SimMarket instance. Set up needed values such as competitors and action/observation-space and reset the environment. @@ -56,6 +62,7 @@ def __init__(self, config: AttrDict, support_continuous_action_space: bool = Fal support_continuous_action_space (bool, optional): If True, the action space will be continuous. Defaults to False. competitors (list, optional): If not None, this overwrites the default competitor list with a custom one. """ + print(f'I initialize {type(self)} as market') self.config = config self.support_continuous_action_space = support_continuous_action_space self.competitors = self._get_competitor_list() if not competitors else competitors @@ -70,6 +77,33 @@ def __init__(self, config: AttrDict, support_continuous_action_space: bool = Fal 'You cannot use the mixed profit and difference reward in a monopoly market' self.reset() + self.document_for_regression = document_for_regression + if self.document_for_regression: + pandas_state_columns = [ + 'own price refurbished', + 'own price new', + 'own rebuy price', + 'competitor price refurbished', + 'competitor price new', + 'competitor rebuy price', + ] + purchases_pandas_state_columns = [ + 'buy nothing', + 'buy refurbished agent', + 'buy new agent', + 'buy refurbished competitor', + 'buy new competitor', + ] + owner_pandas_state_columns = [ + 'product holding', + 'product throw away', + 'rebuy agent', + 'rebuy competitor', + ] + self.customers_dataframe = pd.DataFrame(columns=pandas_state_columns + purchases_pandas_state_columns) + self.owners_dataframe = pd.DataFrame(columns=['in circulation'] + pandas_state_columns + owner_pandas_state_columns) + self.competitor_reaction_dataframe = pd.DataFrame(columns=pandas_state_columns) + def _get_number_of_vendors(self) -> int: """ Return the number of competitors plus the agent. @@ -117,11 +151,22 @@ def _simulate_customers(self, profits, number_of_customers) -> None: number_of_customers (int): the number of customers eager to buy each step. """ probability_distribution = self._customer.generate_purchase_probabilities_from_offer( - self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions) + self.config, self._get_common_state_array(), self.vendor_specific_state, self.vendor_actions) assert isinstance(probability_distribution, np.ndarray), 'generate_purchase_probabilities_from_offer must return an np.ndarray' assert self._is_probability_distribution_fitting_exactly(probability_distribution) - customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist() + if np.abs(np.sum(probability_distribution) - 1) < 0.0001: + customer_decisions = np.random.multinomial(number_of_customers, probability_distribution).tolist() + else: + # Warning: This is not a probability distribution. This should be refactored. + customer_decisions = [0] * len(probability_distribution) + for i, prediction in enumerate(probability_distribution): + customer_decisions[i] = np.ceil(prediction) if np.random.random() < prediction - np.floor(prediction) else np.floor(prediction) + customer_decisions = [int(x) for x in customer_decisions] + + if self.document_for_regression: + new_row = self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + customer_decisions + self.customers_dataframe.loc[len(self.customers_dataframe)] = new_row self._output_dict['customer/buy_nothing'] += customer_decisions[0] for seller, frequency in enumerate(customer_decisions): if seller == 0 or frequency == 0: @@ -172,6 +217,10 @@ def step(self, action) -> Tuple[np.array, float, bool, dict]: f'This vendor does not deliver a suitable action, action_space: {self.action_space}, action: {action_competitor_i}' self.vendor_actions[i + 1] = action_competitor_i + if self.document_for_regression: + self.competitor_reaction_dataframe.loc[len(self.competitor_reaction_dataframe)] = \ + self._observation(1)[2:5].tolist() + self._observation(0)[2:5].tolist() + self._consider_storage_costs(profits) self._ensure_output_dict_has('profits/all', profits) diff --git a/recommerce/monitoring/exampleprinter.py b/recommerce/monitoring/exampleprinter.py index f8b5699a..e8e30a4a 100644 --- a/recommerce/monitoring/exampleprinter.py +++ b/recommerce/monitoring/exampleprinter.py @@ -3,6 +3,8 @@ import signal import sys import time +# turn all warnings into errors +import warnings import matplotlib.pyplot as plt import numpy as np @@ -21,6 +23,8 @@ from recommerce.monitoring.svg_manipulation import SVGManipulator from recommerce.rl.q_learning.q_learning_agent import QLearningAgent +warnings.filterwarnings('error') + class ExamplePrinter(): @@ -52,12 +56,31 @@ def _signal_handler(self, signum, frame) -> None: # pragma: no cover print('\nAborting exampleprinter run...') sys.exit(0) - def run_example(self, save_lineplots=False) -> int: + def _rearrange_info_dicts(self, info_dicts: list, evaluation_left_bound, evaluation_right_bound) -> dict: + """ + Re-arrange the information dictionaries of the current session into a dictionary of lists. + + Args: + info_dicts (list): The information dictionaries to re-arrange. + + Returns: + dict: The re-arranged information dictionaries. + """ + flattened_dicts = [ut.flatten_dict(info) for info in info_dicts] + info_dict = {} + for key in flattened_dicts[0].keys(): + info_dict[key] = [] + for info in flattened_dicts[evaluation_left_bound:evaluation_right_bound]: + for key in info.keys(): + info_dict[key].append(info[key]) + return info_dict + + def run_example(self, save_diagrams=False, evaluation_left_bound=450, evaluation_right_bound=500) -> int: """ Run a specified marketplace with a (pre-trained, if RL) agent and record various statistics using TensorBoard. Args: - save_lineplots (bool, optional): Whether to save lineplots of the market's performance. + save_diagrams (bool, optional): Whether to save lineplots of the market's performance. Returns: int: The profit made. @@ -70,25 +93,28 @@ def run_example(self, save_lineplots=False) -> int: signature = f'exampleprinter_{time.strftime("%b%d_%H-%M-%S")}' writer = SummaryWriter(log_dir=os.path.join(PathManager.results_path, 'runs', signature)) - os.makedirs(os.path.join(PathManager.results_path, 'exampleprinter', signature)) + os.makedirs(os.path.join(PathManager.results_path, 'exampleprinter', signature), exist_ok=True) - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly): + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams: svg_manipulator = SVGManipulator(signature) cumulative_dict = None - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots: + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams: price_used = [[] for _ in range(self.marketplace._number_of_vendors)] price_news = [[] for _ in range(self.marketplace._number_of_vendors)] price_rebuy = [[] for _ in range(self.marketplace._number_of_vendors)] in_storages = [[] for _ in range(self.marketplace._number_of_vendors)] in_circulations = [] + info_dicts = [] + with torch.no_grad(): while not is_done: action = self.agent.policy(state) - print(state) - print(action) + # print(state) + # print(action) state, reward, is_done, logdict = self.marketplace.step(action) + info_dicts.append(logdict) if cumulative_dict is not None: cumulative_dict = ut.add_content_of_two_dicts(cumulative_dict, logdict) else: @@ -96,33 +122,35 @@ def run_example(self, save_lineplots=False) -> int: ut.write_dict_to_tensorboard(writer, logdict, counter) ut.write_dict_to_tensorboard(writer, cumulative_dict, counter, is_cumulative=True, episode_length=self.config_market.episode_length) - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly): + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams: ut.write_content_of_dict_to_overview_svg(svg_manipulator, counter, logdict, cumulative_dict, self.config_market) our_profit += reward counter += 1 - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots: + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams: for i in range(self.marketplace._number_of_vendors): price_used[i].append(logdict['actions/price_refurbished'][f'vendor_{i}']) price_news[i].append(logdict['actions/price_new'][f'vendor_{i}']) price_rebuy[i].append(logdict['actions/price_rebuy'][f'vendor_{i}']) in_storages[i].append(logdict['state/in_storage'][f'vendor_{i}']) in_circulations.append(logdict['state/in_circulation']) - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly): + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams: svg_manipulator.save_overview_svg(filename=('MarketOverview_%.3d' % counter)) - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly): + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPriceDuopoly) and save_diagrams: svg_manipulator.to_html() - if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_lineplots: - self.save_step_diagrams(price_used, price_news, price_rebuy, in_storages, in_circulations, signature) + if isinstance(self.marketplace, circular_market.CircularEconomyRebuyPrice) and save_diagrams: + self.save_step_diagrams(price_used, price_news, price_rebuy, in_storages, in_circulations, signature, + evaluation_left_bound, evaluation_right_bound) - return our_profit + return our_profit, self._rearrange_info_dicts(info_dicts, evaluation_left_bound, evaluation_right_bound) - def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, in_circulations, signature) -> None: + def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, in_circulations, signature, + evaluation_left_bound, evaluation_right_bound) -> None: x = np.array(range(1, self.config_market.episode_length + 1)) plt.step(x, in_circulations) plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, 'lineplot_in_circulations.svg')) - plt.xlim(450, 475) + plt.xlim(evaluation_left_bound, evaluation_right_bound) plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, 'lineplot_in_circulations_xlim.svg'), transparent=True) plt.clf() for data, name in [(price_used, 'price_refurbished'), (price_news, 'price_new'), @@ -138,7 +166,7 @@ def save_step_diagrams(self, price_used, price_news, price_rebuy, in_storages, i elif 'in_storage' in name: plt.ylim(0, 100) plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, f'lineplot_{name}.svg'), transparent=True) - plt.xlim(450, 475) + plt.xlim(evaluation_left_bound, evaluation_right_bound) plt.savefig(os.path.join(PathManager.results_path, 'exampleprinter', signature, f'lineplot_{name}_xlim.svg'), transparent=True) plt.clf() diff --git a/recommerce/rl/ablation_study.py b/recommerce/rl/ablation_study.py new file mode 100644 index 00000000..a03ee952 --- /dev/null +++ b/recommerce/rl/ablation_study.py @@ -0,0 +1,166 @@ +# This is the script describing the ablation study for the paper. +# It does not contain new framework features, but it stays in the repo to keep the experiments reproducible. + +import os +import time +from multiprocessing import Pipe, Process + +import numpy as np +import pandas as pd + +from recommerce.configuration.hyperparameter_config import HyperparameterConfigLoader +from recommerce.configuration.path_manager import PathManager +from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted, CircularEconomyRebuyPriceOligopoly +from recommerce.monitoring.exampleprinter import ExamplePrinter +from recommerce.rl.stable_baselines.sb_ppo import StableBaselinesPPO + + +def create_relevant_dataframe(descriptions, info_sequences_list): + parameters = [ + ('profit', lambda info_sequence: np.mean(info_sequence['profits/all/vendor_0'])), + ('new sales', lambda info_sequence: np.mean(info_sequence['customer/purchases_new/vendor_0'])), + ('refurbished sales', lambda info_sequence: np.mean(info_sequence['customer/purchases_refurbished/vendor_0'])), + ('rebuys', lambda info_sequence: np.mean(info_sequence['owner/rebuys/vendor_0'])), + ('offer price new', lambda info_sequence: np.mean(info_sequence['actions/price_new/vendor_0'])), + ('offer price refurbished', lambda info_sequence: np.mean(info_sequence['actions/price_refurbished/vendor_0'])), + ('offer price rebuy', lambda info_sequence: np.mean(info_sequence['actions/price_rebuy/vendor_0'])), + ('sales price new', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_new/vendor_0']) * + np.array(info_sequence['customer/purchases_new/vendor_0'])) / + np.sum(info_sequence['customer/purchases_new/vendor_0'])), + ('sales price refurbished', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_refurbished/vendor_0']) * + np.array(info_sequence['customer/purchases_refurbished/vendor_0'])) / + np.sum(info_sequence['customer/purchases_refurbished/vendor_0'])), + ('sales price rebuy', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_rebuy/vendor_0']) * + np.array(info_sequence['owner/rebuys/vendor_0'])) / + np.sum(info_sequence['owner/rebuys/vendor_0'])), + ('inventory level', lambda info_sequence: np.mean(info_sequence['state/in_storage/vendor_0'])), + ('profit competitor', lambda info_sequence: np.mean(info_sequence['profits/all/vendor_1'])), + ('new sales competitor', lambda info_sequence: np.mean(info_sequence['customer/purchases_new/vendor_1'])), + ('refurbished sales competitor', lambda info_sequence: np.mean(info_sequence['customer/purchases_refurbished/vendor_1'])), + ('rebuys competitor', lambda info_sequence: np.mean(info_sequence['owner/rebuys/vendor_1'])), + ('offer price new competitor', lambda info_sequence: np.mean(info_sequence['actions/price_new/vendor_1'])), + ('offer price refurbished competitor', lambda info_sequence: np.mean(info_sequence['actions/price_refurbished/vendor_1'])), + ('offer price rebuy competitor', lambda info_sequence: np.mean(info_sequence['actions/price_rebuy/vendor_1'])), + ('sales price new competitor', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_new/vendor_1']) * + np.array(info_sequence['customer/purchases_new/vendor_1'])) / + np.sum(info_sequence['customer/purchases_new/vendor_1'])), + ('sales price refurbished competitor', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_refurbished/vendor_1']) * + np.array(info_sequence['customer/purchases_refurbished/vendor_1'])) / + (np.sum(info_sequence['customer/purchases_refurbished/vendor_1']) + 1e-10)), + ('sales price rebuy competitor', + lambda info_sequence: np.sum(np.array(info_sequence['actions/price_rebuy/vendor_1']) * + np.array(info_sequence['owner/rebuys/vendor_1'])) / + (np.sum(info_sequence['owner/rebuys/vendor_1']) + 1e-10)), + ('inventory level competitor', lambda info_sequence: np.mean(info_sequence['state/in_storage/vendor_1'])), + ('resources in use', lambda info_sequence: np.mean(info_sequence['state/in_circulation'])), + ('throw away', lambda info_sequence: np.mean(info_sequence['owner/throw_away'])) + ] + + dataframe_columns = ['market configuration'] + [parameter_name for parameter_name, _ in parameters] + + dataframe = pd.DataFrame(columns=dataframe_columns) + for description, info_sequences in zip(descriptions, info_sequences_list): + row = [description] + for parameter_name, parameter_function in parameters: + row.append(parameter_function(info_sequences)) + dataframe.loc[len(dataframe)] = row + return dataframe + + +def run_training_session(market_class, config_market, agent_class, config_rl, training_steps, number, pipe_to_parent): + agent = agent_class(config_market, config_rl, market_class(config_market, support_continuous_action_space=True), name=f'Train{number}') + agent.train_with_default_eval(training_steps) + exampleprinter = ExamplePrinter(config_market) + marketplace = market_class(config_market, support_continuous_action_space=True) + exampleprinter.setup_exampleprinter(marketplace, agent) + profit, info_sequences = exampleprinter.run_example(save_diagrams=False) + pipe_to_parent.send(info_sequences) + + +def run_group(market_configs, market_descriptions, training_steps, target_function=run_training_session): + market_class = CircularEconomyRebuyPriceDuopoly if \ + 'oligopol_competitors' not in market_configs[0].keys() else CircularEconomyRebuyPriceOligopoly + rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO) + pipes = [] + for _ in market_configs: + pipes.append(Pipe(False)) + print(market_configs) + processes = [Process(target=target_function, + args=(CircularEconomyRebuyPriceDuopoly if 'oligopol_competitors' not in config_market.keys() else CircularEconomyRebuyPriceOligopoly, config_market, StableBaselinesPPO, rl_config, training_steps, description, pipe_entry)) + for config_market, description, (_, pipe_entry) in zip(market_configs, market_descriptions, pipes)] + print('Now I start the processes') + for p in processes: + time.sleep(2) + p.start() + print('Now I wait for the results') + info_sequences = [output.recv() for output, _ in pipes] + print('Now I have the results') + for p in processes: + p.join() + print('All threads joined') + return create_relevant_dataframe(market_descriptions, info_sequences) + + +def get_different_market_configs(parameter_name, values): + market_configs = [] + descriptions = [f'{parameter_name}={value}' for value in values] + for value in values: + market_config = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly) + market_config[parameter_name] = value + market_configs.append(market_config) + return market_configs, descriptions + + +if __name__ == '__main__': + # experiments = [('price_step_size', [1.5, 1, 0.5, 0.25])] + # experiments = [('max_storage', [20, 50, 200]), + # ('production_price', [2, 4]), + # ('number_of_customers', [10, 30]), + # ('storage_cost', [0.01, 0.1, 0.2]), + # ('compared_value_old', [0.4, 0.6]), + # ('upper_tolerance_old', [4.0, 6.0]), + # ('upper_tolerance_new', [7.0, 9.0]), + # ('share_interested_owners', [0.025, 0.075]), + # ('competitor_lowest_storage_level', [4.5, 8.5]), + # ('competitor_ok_storage_level', [9.5, 15.5]) + # ] + experiments = [('storage_cost', [0.01, 0.1, 0.25, 0.5]), + ('oligopol_competitors', [1, 2, 3, 4])] + market_configs, descriptions = [], [] + for experiment in experiments: + print(experiment) + single_configs, single_descriptions = get_different_market_configs(*experiment) + market_configs += single_configs + descriptions += single_descriptions + + print(f'Now I start the experiments. There are {len(market_configs)} experiments in total.') + dataframes = [] + parallel_runs = 4 + for i in range(0, len(market_configs), parallel_runs): + print(f'Now I start the experiments {i}-{i+parallel_runs}') + tmp_dataframe = run_group(market_configs[i:i+parallel_runs], descriptions[i:i+parallel_runs], 1000000) + dataframes.append(tmp_dataframe) + print(f'Saving dataframe {i}-{i+parallel_runs}') + tmp_dataframe.to_excel(os.path.join(PathManager.results_path, f'dataframe{i}-{i+parallel_runs}.xlsx'), index=False) + dataframe = pd.concat(dataframes) + print('Now I have the dataframe. I save it...') + dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe.xlsx'), index=False) + print('Done') + + # market_config = HyperparameterConfigLoader.load('market_config', CircularEconomyRebuyPriceDuopoly) + # rl_config = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO) + # load_path = os.path.join(PathManager.data_path, 'rl_model_300000_steps.zip') + # agent = StableBaselinesPPO(market_config, rl_config, CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), name='PPO on fitted market', load_path=load_path) + # exampleprinter_real = ExamplePrinter(market_config) + # exampleprinter_real.setup_exampleprinter(CircularEconomyRebuyPriceDuopoly(market_config, support_continuous_action_space=True), agent) + # _, info_sequences = exampleprinter_real.run_example(save_diagrams=False) + # exampleprinter_fitted = ExamplePrinter(market_config) + # exampleprinter_fitted.setup_exampleprinter(CircularEconomyRebuyPriceDuopolyFitted(market_config, support_continuous_action_space=True), agent) + # _, info_sequences_fitted = exampleprinter_fitted.run_example(save_diagrams=False) + # dataframe = create_relevant_dataframe(['Values on real market', 'Values on fitted market'], [info_sequences, info_sequences_fitted]) + # dataframe.to_excel(os.path.join(PathManager.results_path, 'dataframe_fitted_vs_real.xlsx'), index=False) diff --git a/recommerce/rl/callback.py b/recommerce/rl/callback.py index 44fd23da..977ad8bd 100644 --- a/recommerce/rl/callback.py +++ b/recommerce/rl/callback.py @@ -11,6 +11,7 @@ import recommerce.configuration.utils as ut from recommerce.configuration.path_manager import PathManager +from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted from recommerce.market.sim_market import SimMarket from recommerce.market.vendors import RuleBasedAgent from recommerce.monitoring.agent_monitoring.am_monitoring import Monitor @@ -176,7 +177,7 @@ def _on_training_end(self) -> None: analyze_consecutive_models( self.saved_parameter_paths, monitor, - type(self.marketplace), + CircularEconomyRebuyPriceDuopoly if isinstance(self.marketplace, CircularEconomyRebuyPriceDuopolyFitted) else type(self.marketplace), self.config_market, self.agent_class, hasattr(self.model, 'env'), diff --git a/recommerce/rl/stable_baselines/stable_baselines_model.py b/recommerce/rl/stable_baselines/stable_baselines_model.py index 34408433..c6eb2a6c 100644 --- a/recommerce/rl/stable_baselines/stable_baselines_model.py +++ b/recommerce/rl/stable_baselines/stable_baselines_model.py @@ -1,13 +1,18 @@ import os +import time from abc import ABC, abstractmethod import numpy as np +import pandas as pd from attrdict import AttrDict +from stable_baselines3.common.callbacks import CheckpointCallback from recommerce.configuration.path_manager import PathManager +from recommerce.market.circular.circular_sim_market import CircularEconomyRebuyPriceDuopoly, CircularEconomyRebuyPriceDuopolyFitted from recommerce.market.circular.circular_vendors import CircularAgent from recommerce.market.linear.linear_vendors import LinearAgent from recommerce.market.sim_market import SimMarket +from recommerce.monitoring.exampleprinter import ExamplePrinter from recommerce.rl.callback import RecommerceCallback from recommerce.rl.reinforcement_learning_agent import ReinforcementLearningAgent @@ -53,11 +58,58 @@ def set_marketplace(self, new_marketplace: SimMarket): def train_agent(self, training_steps=100001, iteration_length=500, analyze_after_training=True): callback = RecommerceCallback( - type(self), self.marketplace, self.config_market, self.config_rl, training_steps=training_steps, iteration_length=iteration_length, - signature=self.name, analyze_after_training=analyze_after_training) + type(self), self.marketplace, self.config_market, self.config_rl, training_steps=training_steps, + iteration_length=iteration_length, signature=self.name, analyze_after_training=analyze_after_training) self.model.learn(training_steps, callback=callback) return callback.watcher + def train_with_default_eval(self, training_steps=100001): + token = time.strftime('%b%d_%H-%M-%S') + save_path = os.path.join(PathManager.results_path, f'model_files_{token}', f'{self.name}') + log_path = os.path.join(PathManager.results_path, 'logs', f'{token}') + os.makedirs(log_path, exist_ok=True) + step_size = 25000 + callback = CheckpointCallback(step_size, save_path=save_path) + self.model.learn(training_steps, callback=callback) + if self.marketplace.document_for_regression: + self.marketplace.customers_dataframe.to_excel(os.path.join(PathManager.results_path, f'customers_dataframe_{self.name}.xlsx')) + self.marketplace.owners_dataframe.to_excel(os.path.join(PathManager.results_path, f'owners_dataframe_{self.name}.xlsx')) + self.marketplace.competitor_reaction_dataframe.to_excel( + os.path.join(PathManager.results_path, f'competitor_reaction_dataframe_{self.name}.xlsx')) + + best_profit = -np.inf + profits = [] + # fitted_profits = [] + # iterate through the saved models and evaluate them by running the exampleprinter + modelfiles = sorted(os.listdir(save_path)) + for model_file in modelfiles: + print('I analyze the model: ', model_file) + agent = type(self)(self.config_market, self.config_rl, self.marketplace, load_path=os.path.join(save_path, model_file)) + exampleprinter = ExamplePrinter(self.config_market) + marketplace = type(self.marketplace)(self.config_market, support_continuous_action_space=True) + exampleprinter.setup_exampleprinter(marketplace, agent) + _, info_sequence = exampleprinter.run_example() + profit = np.mean(info_sequence['profits/all/vendor_0']) + profits.append(profit) + print(f'profit per step of {model_file}: {profit}') + if profit > best_profit: + best_profit = profit + best_model = model_file + + # evaluate on the fitted market + # exampleprinter_fitted = ExamplePrinter(self.config_market) + # marketplace = CircularEconomyRebuyPriceDuopolyFitted(self.config_market, support_continuous_action_space=True) + # exampleprinter_fitted.setup_exampleprinter(marketplace, agent) + # _, info_sequence = exampleprinter_fitted.run_example() + # profit = np.mean(info_sequence['profits/all/vendor_0']) + # fitted_profits.append(profit) + print(f'best model: {best_model} with profit {best_profit}') + print('Saving the results of the evaluation in the following path: ', log_path) + dataframe = pd.DataFrame.from_dict({'model': modelfiles, 'profit': profits}) + dataframe.to_excel(os.path.join(log_path, f'evaluation_{time.strftime("%b%d_%H-%M-%S")}.xlsx')) + print('Done!') + return save_path + @staticmethod def get_configurable_fields() -> list: raise NotImplementedError diff --git a/recommerce/rl/training_scenario.py b/recommerce/rl/training_scenario.py index 7d0b36ce..f99abff5 100644 --- a/recommerce/rl/training_scenario.py +++ b/recommerce/rl/training_scenario.py @@ -133,17 +133,17 @@ def train_stable_baselines_a2c(): StableBaselinesA2C( config_market=config_market, config_rl=config_rl, - marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(100000) + marketplace=circular_market.CircularEconomyRebuyPriceDuopoly(config_market, True)).train_agent(500000) def train_stable_baselines_ppo(): - used_marketplace = circular_market.CircularEconomyRebuyPriceDuopoly + used_marketplace = circular_market.CircularEconomyRebuyPriceDuopolyFitted config_market: AttrDict = HyperparameterConfigLoader.load('market_config', used_marketplace) config_rl: AttrDict = HyperparameterConfigLoader.load('sb_ppo_config', StableBaselinesPPO) StableBaselinesPPO( config_market=config_market, config_rl=config_rl, - marketplace=used_marketplace(config_market, True)).train_agent(1000000) + marketplace=used_marketplace(config_market, True)).train_with_default_eval(1000000) def train_stable_baselines_sac(): @@ -202,4 +202,4 @@ def main(): # Make sure a valid datapath is set PathManager.manage_user_path() - main() + train_stable_baselines_ppo() diff --git a/setup.cfg b/setup.cfg index 167f033c..ddd3722d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,8 @@ install_requires = names>=0.3.0 scipy>=1.8.0 attrdict>=2.0.1 + openpyxl>=3.1.2 + scikit-learn>=1.2.2 python_requires = >=3.8 [options.extras_require] diff --git a/sqlite.db b/sqlite.db new file mode 100644 index 00000000..6536e357 Binary files /dev/null and b/sqlite.db differ diff --git a/tests/test_customers.py b/tests/test_customers.py index 660b8d68..956d3ee5 100644 --- a/tests/test_customers.py +++ b/tests/test_customers.py @@ -16,7 +16,8 @@ # Test the Customer parent class, i.e. make sure it cannot be used def test_customer_parent_class(): with pytest.raises(NotImplementedError) as assertion_message: - customer.Customer.generate_purchase_probabilities_from_offer(CustomerLinear, *random_offer(linear_market.LinearEconomyDuopoly)) + customer.Customer.generate_purchase_probabilities_from_offer(CustomerLinear, config_market, + *random_offer(linear_market.LinearEconomyDuopoly)) assert 'This method is abstract. Use a subclass' in str(assertion_message.value) @@ -42,7 +43,7 @@ def test_customer_parent_class(): generate_purchase_probabilities_from_offer_testcases) def test_generate_purchase_probabilities_from_offer(customer, common_state, vendor_specific_state, vendor_actions, expected_message): with pytest.raises(AssertionError) as assertion_message: - customer.generate_purchase_probabilities_from_offer(customer, common_state, vendor_specific_state, vendor_actions) + customer.generate_purchase_probabilities_from_offer(customer, config_market, common_state, vendor_specific_state, vendor_actions) assert expected_message in str(assertion_message.value) @@ -58,7 +59,7 @@ def test_generate_purchase_probabilities_from_offer(customer, common_state, vend @pytest.mark.parametrize('customer, market', customer_action_range_testcases) def test_customer_action_range(customer, market): offers = random_offer(market) - probability_distribution = customer.generate_purchase_probabilities_from_offer(customer, *offers) + probability_distribution = customer.generate_purchase_probabilities_from_offer(customer, config_market, *offers) assert len(probability_distribution) == market(config=config_market)._get_number_of_vendors() * \ (1 if issubclass(market, linear_market.LinearEconomy) else 2) + 1 @@ -66,14 +67,14 @@ def test_customer_action_range(customer, market): def test_linear_higher_price_lower_purchase_probability(): common_state, vendor_specific_state, vendor_actions = np.array([]), [[12], [12]], [3, 5] probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer( - CustomerLinear, common_state, vendor_specific_state, vendor_actions) + CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions) assert probability_distribution[1] > probability_distribution[2] def test_linear_higher_quality_higher_purchase_probability(): common_state, vendor_specific_state, vendor_actions = np.array([]), [[13], [12]], [3, 3] probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer( - CustomerLinear, common_state, vendor_specific_state, vendor_actions) + CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions) assert probability_distribution[1] > probability_distribution[2] @@ -81,17 +82,17 @@ def test_equal_ratio_equal_purchase_probability(): # In the following line: [3, 1] means prices [4, 2] common_state, vendor_specific_state, vendor_actions = np.array([]), [[16], [8]], [3, 1] probability_distribution = CustomerLinear.generate_purchase_probabilities_from_offer( - CustomerLinear, common_state, vendor_specific_state, vendor_actions) + CustomerLinear, config_market, common_state, vendor_specific_state, vendor_actions) assert probability_distribution[1] == probability_distribution[2] def test_linear_lower_overall_price_lower_nothing_probability(): common_state1, vendor_specific_state1, vendor_actions1 = np.array([]), [[15], [15]], [3, 3] probability_distribution1 = CustomerLinear.generate_purchase_probabilities_from_offer( - CustomerLinear, common_state1, vendor_specific_state1, vendor_actions1) + CustomerLinear, config_market, common_state1, vendor_specific_state1, vendor_actions1) common_state2, vendor_specific_state2, vendor_actions2 = np.array([]), [[15], [15]], [4, 4] probability_distribution2 = CustomerLinear.generate_purchase_probabilities_from_offer( - CustomerLinear, common_state2, vendor_specific_state2, vendor_actions2) + CustomerLinear, config_market, common_state2, vendor_specific_state2, vendor_actions2) print(probability_distribution1) print(probability_distribution2) assert probability_distribution1[0] < probability_distribution2[0] @@ -102,7 +103,7 @@ def test_linear_lower_overall_price_lower_nothing_probability(): def test_circular_higher_price_lower_purchase_probability(): common_state, vendor_specific_state, vendor_actions = np.array([]), [[17], [23]], [[3, 6], [4, 5]] probability_distribution = CustomerCircular.generate_purchase_probabilities_from_offer( - CustomerCircular, common_state, vendor_specific_state, vendor_actions) + CustomerCircular, config_market, common_state, vendor_specific_state, vendor_actions) assert probability_distribution[1] > probability_distribution[3] assert probability_distribution[2] < probability_distribution[4] diff --git a/tests/test_data/configuration_files/market_config.json b/tests/test_data/configuration_files/market_config.json index 25031a42..bd83cf62 100644 --- a/tests/test_data/configuration_files/market_config.json +++ b/tests/test_data/configuration_files/market_config.json @@ -8,5 +8,12 @@ "storage_cost_per_product": 0.1, "opposite_own_state_visibility": true, "common_state_visibility": true, - "reward_mixed_profit_and_difference": false + "reward_mixed_profit_and_difference": false, + "compared_value_old": 0.55, + "upper_tolerance_old": 5.0, + "upper_tolerance_new": 8.0, + "share_interested_owners": 0.05, + "competitor_lowest_storage_level": 6.5, + "competitor_ok_storage_level": 12.5, + "price_step_size": 1.0 } diff --git a/tests/test_exampleprinter.py b/tests/test_exampleprinter.py index 8630dbd0..693bde01 100644 --- a/tests/test_exampleprinter.py +++ b/tests/test_exampleprinter.py @@ -56,7 +56,7 @@ def test_setup_exampleprinter(): def test_full_episode_rule_based(marketplace, agent): printer = ExamplePrinter(config_market=config_market) printer.setup_exampleprinter(marketplace, agent) - assert printer.run_example(True) >= -5000 + assert printer.run_example(True)[0] >= -5000 shutil.rmtree(PathManager.results_path) @@ -85,11 +85,11 @@ def test_full_episode_rl_agents(marketplace, agent_class, parameters_file, confi load_path=os.path.join(parameters_path, parameters_file)) printer = ExamplePrinter(config_market=config_market) printer.setup_exampleprinter(marketplace, agent) - assert printer.run_example(True) >= -5000 + assert printer.run_example(True)[0] >= -5000 shutil.rmtree(PathManager.results_path) @pytest.mark.slow def test_exampleprinter_with_tensorboard(): - assert ExamplePrinter(config_market=config_market).run_example(True) >= -5000 + assert ExamplePrinter(config_market=config_market).run_example(True)[0] >= -5000 shutil.rmtree(PathManager.results_path)