-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpolicy.py
32 lines (26 loc) · 788 Bytes
/
policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
# from player import state_transformation
def state_transformation(state):
return state.raw_sum + 30, state.trump_count(), state.opponent-1
def dealer_policy(state):
if state.best_sum() < 25:
return "HIT"
else:
return "STICK"
def always_hit(state):
return "HIT"
def always_stick(state):
return "STICK"
def greedy(state, q):
state = state_transformation(state)
if q[state][0] >= q[state][1]:
return "HIT"
else:
return "STICK"
def epsilon_greedy(state, q, epsilon):
greedy_action = greedy(state, q)
non_greedy_action = "HIT" if greedy_action=="STICK" else "STICK"
if np.random.rand(1)[0] < 1 - epsilon + (epsilon/2):
return greedy_action
else:
return non_greedy_action