-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
125 lines (105 loc) · 3.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import mdp
import pandas as pd
from board import Board
depth = 3
breadth = 10
def spawn(board, dice, index, pip = 1):
try:
return board.spawn_dice(dice, index, pip)
except AssertionError as e:
print(e)
return board
def remove(board, index):
try:
return board.remove_dice(index)
except AssertionError as e:
print(e)
return board
def merge(board, src, dest, dice):
try:
return board.merge_dice(src, dest, dice)
except AssertionError as e:
print(e)
return board
def process_command(comd, board):
"""
One of the following:
- (S)pawn (D)ice (I)ndex (P)ip
- (R)emove (I)ndex
- (M)erge (S)ource (D)estination (N)ew_dice
:param comd: string delimited by spaces
:return: True if command was properly processed
"""
tokens = comd.split(' ')
if len(tokens) == 0:
print('Empty command received')
return board, True
if tokens[0].lower() == 'e':
print('Exit received')
return board, False
if tokens[0].lower() == 's':
if len(tokens) == 4:
return spawn(board, tokens[1], int(tokens[2]) - 1, int(tokens[3])), True
else:
print(f'Wrong number of args for (S)pawn: {len(tokens)}')
return board, True
if tokens[0].lower() == 'r':
if len(tokens) == 2:
return remove(board, int(tokens[1]) - 1), True
else:
print(f'Wrong number of args for (R)emove: {len(tokens)}')
return board, True
if tokens[0].lower() == 'm':
if len(tokens) == 4:
return merge(board, int(tokens[1]) - 1, int(tokens[2]) - 1, tokens[3]), True
else:
print(f'Wrong number of args for (M)erge: {len(tokens)}')
return board, True
print('Unknown command received')
return board, True
def mdp_params(board):
s_a_r_dict = board.mdp_params(depth, breadth)
state_strings = []
actions = []
trans_probs = []
for state, action_dict in s_a_r_dict.items():
state_strings.append(state)
for action, result in action_dict.items():
state_strings.append(result.state_str())
actions.append(action)
trans_probs.append([state, action, result.state_str(), 1 / Board.DECK_LIMIT])
state_strings = list(set(state_strings))
actions = list(set(actions))
return state_strings, actions, trans_probs
def run():
deck = ['c', 'j', 'o', 'g', 'm']
board = Board.new_board(deck)
print(f'Initialized empty Board with deck: {deck}')
print(board)
def reward_func(state, action, result_state):
return Board.parse_state_str(result_state, deck).dps() - Board.parse_state_str(state, deck).dps()
while True:
prompt(deck)
try:
states, actions, trans_probs = mdp_params(board)
model = mdp.MDP(states, actions, trans_probs, reward_func, 0.5)
pi_star = model.pi_stars[board.state_str()]
print(f'Optimal step with depth {depth} and breadth {breadth}: {pi_star}')
except:
print(f'No optimal step here')
command = input('Next command:\n')
board, cont = process_command(command, board)
if not cont:
print('Bye!')
break
print(board)
def prompt(deck):
print('---- COMMANDS ----')
print('(S)pawn dice index pip')
print('(R)emove index')
print('(M)erge src dest new_dice')
print('(E)xit')
print('---- DECK ----')
print(deck)
if __name__ == '__main__':
run()