-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEnvironment.py
196 lines (159 loc) · 5.62 KB
/
Environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import pandas as pd
import random
import os
TRAIN = True
class Environment:
data_dir = "./Data/sandp500/individual_stocks_5yr/Train/" if TRAIN else "./Data/sandp500/individual_stocks_5yr/Test/"
days = 30
portfolio = {
"shares": 0,
"balance": 100000,
}
stock_i = 0
TRANSACTION_FEE = 7
def __init__(self):
self.stock_list = [s for s in os.walk(self.data_dir)][0][2]
random.shuffle(self.stock_list)
def reset(self):
"""
Usually called before the program runs. During this time, the ith day will be reset
and the state will be returned for initial reference
Returns
----------
state : ndarray[self.days * 5]
The latest stock prices for the timeframe
"""
try:
# print(self.stock_list[self.stock_i])
self.df = pd.read_csv(self.data_dir + self.stock_list[self.stock_i]).drop(["date", "Name"], axis=1).dropna()
except IndexError:
print("Reset Stock List")
self.stock_i=0
self.df = pd.read_csv(self.data_dir + self.stock_list[self.stock_i]).drop(["date", "Name"], axis=1).dropna()
self.i = 0
state, _ = self._get_state()
self.portfolio = {
"shares": 0,
"balance": 100000,
}
self.stock_i += 1
return state
def step(self, action:dict):
"""
Environment will simulate whatever action the agent takes.
Parameters
----------
action : dict(3)
>>> {"hold":0, "buy": 1, "sell":0}
Returns
-------
state : ndarray[self.days * 5]
Game state after the action was made
reward : int
Reward from whatever action the agent took
done : bool
Whether the dataset for steps has been exhausted
"""
# Create and find reward for action
reward = None
if action["buy"] != 0:
# Buy some shares
reward = self._buy(action["buy"])
elif action["sell"] != 0:
# Buy some shares
reward = self._sell(action["sell"])
else:
reward = self.calc_reward()
# Transaction fee implemented
if action["sell"] != 0 or action["buy"] != 0:
self.portfolio["balance"] -= self.TRANSACTION_FEE
state, done = self._get_state()
# print("BALANCE: %s" % self.portfolio["balance"])
return state, reward, done
def _get_state(self, move_day:bool=True):
"""
Returns the latest state in refernce to `self.i`
Parameters
----------
move_day : bool
The option to allow the function to iterate
to the next day
Returns
---------
state : ndarray[self.days * 5]
Game state after the action has been made
done : bool[true]
Whether the dataset has been used up or not
"""
state = self.df.iloc[self.i:self.i+self.days]
if move_day: self.i += 1
return state, self.i+self.days +1 >= len(self.df)
def _buy(self, n_shares:int):
"""
Checks if the stock can be purchased based on `self.portfolio.balance`
and then conducts trade based on last closing price
Parameters
----------
n_shares : int
Number of stocks to purchase
Returns
--------
reward : float
The reward is the cost of the trade that the agent made
This will be negative because of how you pay money to buy stock
"""
action_price = self.df.iloc[self.i+1]["open"]
# Check that the total amount of money needed to buy is less
# than the amount of money available to the person
if action_price * n_shares > self.portfolio["balance"]: return 0
# Deduct and Update portfolio
self.portfolio["shares"] += n_shares
self.portfolio["balance"] -= action_price * n_shares
return self.calc_reward()
def _sell(self, n_shares:int):
"""
Sell as many shares as the agent wants to by editing
`self.portfolio`. Also updates the balance of money for the
agent, too.
Parameters
----------
n_shares : int
Number of stocks to purchase
Returns
--------
reward : float
The reward is the profit that just came from the trade
"""
# Check that the number of shares agent wants to sell
# is actually owned by the agent
if n_shares > self.portfolio["shares"]: return 0
action_price = self.df.iloc[self.i+1]["open"]
# Deduct and Update portfolio
self.portfolio["shares"] -= n_shares
self.portfolio["balance"] += action_price * n_shares
return self.calc_reward()
def calc_reward(self):
"""
Reward is calculated of of the PE ratio
Returns:
- Reward: int
"""
return self.portfolio["balance"] + self.portfolio["shares"] * self._get_state(move_day=False)[0].iloc[-1]["close"]
def net_change(self):
return (self.df.iloc[-1]["close"] - self.df.iloc[0]["close"]) / self.df.iloc[-1]["close"]
def get_df(self):
"""
Returns Dataframe of the Environment Currently
Returns
- df: pd.DataFrame
"""
return self.df
@property
def action_space(self):
return 3
@property
def observation_space(self):
return (self.days, 5)
if __name__ == "__main__":
env = Environment()
state = env.reset()