Skip to content

Commit

Permalink
Merge branch 'release/1.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
ronaldosvieira committed May 31, 2022
2 parents 582dab8 + 53f416f commit 8fe6d14
Show file tree
Hide file tree
Showing 14 changed files with 1,065 additions and 52 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ license: MIT
message: "If you use this software, please cite it as below."
repository-code: "https://github.com/ronaldosvieira/gym-locm"
title: "OpenAI Gym Environments for Legends of Code and Magic"
version: "1.1.0"
version: "1.2.0"
...
11 changes: 6 additions & 5 deletions gym_locm/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _eval_creature(creature):
return score

@staticmethod
def _eval_state(state):
def eval_state(state):
score = 0

player, enemy = state.current_player, state.opposing_player
Expand Down Expand Up @@ -353,7 +353,7 @@ def _brute_force_leaf(self, state, alpha):

self.leaf += 1

return best_action, -self._eval_state(state)
return best_action, -self.eval_state(state)

def _brute_force(self, state, depth, alpha):
state = state.clone()
Expand Down Expand Up @@ -415,7 +415,7 @@ def _run_brute_force(self, state, depth, alpha):
else:
return action, -100000

return action, self._eval_state(state)
return action, self.eval_state(state)

def act(self, state, time_limit_ms=1000):
self.leaf = 0
Expand Down Expand Up @@ -1129,8 +1129,9 @@ def act(self, state):


class RLBattleAgent(Agent):
def __init__(self, model):
def __init__(self, model, deterministic=False):
self.model = model
self.deterministic = deterministic

self.hidden_states = None
self.dones = None
Expand All @@ -1145,7 +1146,7 @@ def reset(self):
def act(self, state, action_masks):
action, self.hidden_states = \
self.model.predict(state, state=self.hidden_states,
mask=self.dones, deterministic=True,
deterministic=self.deterministic,
action_masks=action_masks)

return action
Expand Down
6 changes: 5 additions & 1 deletion gym_locm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,11 @@ def clone(self) -> 'State':
cloned_state = State.empty_copy()

cloned_state.np_random = np.random.RandomState()
cloned_state.np_random.set_state(self.np_random.get_state())

try:
cloned_state.np_random.set_state(self.np_random.get_state())
except ValueError:
pass

cloned_state.instance_counter = self.instance_counter
cloned_state.summon_counter = self.summon_counter
Expand Down
14 changes: 13 additions & 1 deletion gym_locm/envs/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,29 @@
from prettytable import PrettyTable

from gym_locm.engine import Creature, GreenItem, RedItem, BlueItem, State, Phase, ActionType, Action, Lane
from gym_locm.envs.rewards import parse_reward
from gym_locm.exceptions import MalformedActionError


class LOCMEnv(gym.Env, ABC):
card_types = {Creature: 0, GreenItem: 1, RedItem: 2, BlueItem: 3}

def __init__(self, seed=None, items=True, k=3, n=30):
def __init__(self, seed=None, items=True, k=3, n=30, reward_functions=('win-loss',), reward_weights=(1.0,)):
self._seed = seed
self.episodes = 0
self.items = items
self.k, self.n = k, n

assert len(reward_functions) == len(reward_weights), \
"The length of reward_functions and reward_weights must be the same"

self.reward_functions = tuple([parse_reward(function_name)() for function_name in reward_functions])
self.reward_weights = reward_weights

self.last_player_rewards = [None, None]

self.reward_range = (-sum(reward_weights), sum(reward_weights))

self.state = State(seed=seed, items=items, k=k, n=n)

def seed(self, seed=None):
Expand Down Expand Up @@ -46,6 +57,7 @@ def reset(self):
self.state = State(seed=self._seed, items=self.items)

self.episodes += 1
self.last_player_rewards = [None, None]

def render(self, mode: str = 'text'):
"""Builds a representation of the current state."""
Expand Down
31 changes: 21 additions & 10 deletions gym_locm/envs/battle.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ class LOCMBattleEnv(LOCMEnv):

def __init__(self,
draft_agents=(RandomDraftAgent(), RandomDraftAgent()),
return_action_mask=False, seed=None, items=True, k=3, n=30):
super().__init__(seed=seed, items=items, k=k, n=n)
return_action_mask=False, seed=None, items=True, k=3, n=30,
reward_functions=('win-loss',), reward_weights=(1.0,)):
super().__init__(seed=seed, items=items, k=k, n=n,
reward_functions=reward_functions, reward_weights=reward_weights)

self.rewards = [0.0]

Expand Down Expand Up @@ -49,8 +51,6 @@ def __init__(self,
# 41 possible actions
self.action_space = gym.spaces.Discrete(41)

self.reward_range = (-1, 1)

# play through draft
while self.state.phase == Phase.DRAFT:
for agent in self.draft_agents:
Expand Down Expand Up @@ -79,29 +79,40 @@ def step(self, action):
# less property accesses
state = self.state

self.last_player_rewards[state.current_player.id] = \
[weight * function.calculate(state, for_player=PlayerOrder.FIRST)
for function, weight in zip(self.reward_functions, self.reward_weights)]

# execute the action
if action is not None:
state.act(action)
else:
state.was_last_action_invalid = True

reward_before = self.last_player_rewards[state.current_player.id]
reward_after = [weight * function.calculate(state, for_player=PlayerOrder.FIRST)
for function, weight in zip(self.reward_functions, self.reward_weights)]

# build return info
winner = state.winner

reward = 0
if reward_before is None:
raw_rewards = (0.0,) * len(self.reward_functions)
else:
raw_rewards = tuple([after - before for before, after in zip(reward_before, reward_after)])

reward = sum(raw_rewards)
done = winner is not None
info = {'phase': state.phase,
'turn': state.turn,
'winner': winner,
'invalid': state.was_last_action_invalid}
'invalid': state.was_last_action_invalid,
'raw_rewards': raw_rewards}

if self.return_action_mask:
info['action_mask'] = self.state.action_mask

if winner is not None:
reward = 1 if winner == PlayerOrder.FIRST else -1

self.rewards[-1] += reward
self.rewards[-1] += reward

return self.encode_state(), reward, done, info

Expand Down
31 changes: 27 additions & 4 deletions gym_locm/envs/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ def __init__(self,
battle_agents=(RandomBattleAgent(), RandomBattleAgent()),
use_draft_history=False, use_mana_curve=False,
sort_cards=False, evaluation_battles=1,
seed=None, items=True, k=3, n=30):
super().__init__(seed=seed, items=items, k=k, n=n)
seed=None, items=True, k=3, n=30,
reward_functions=('win-loss',), reward_weights=(1.0,)):
super().__init__(seed=seed, items=items, k=k, n=n,
reward_functions=reward_functions, reward_weights=reward_weights)

# init bookkeeping structures
self.results = []
Expand Down Expand Up @@ -93,6 +95,11 @@ def step(self, action: Union[int, Action]) -> (np.array, int, bool, dict):

# less property accesses
state = self.state
current_player_id = state.current_player.id

self.last_player_rewards[state.current_player.id] = \
[weight * function.calculate(state, for_player=current_player_id)
for function, weight in zip(self.reward_functions, self.reward_weights)]

# find appropriate value for the provided card index
if 0 <= action.origin < self.k:
Expand All @@ -107,8 +114,11 @@ def step(self, action: Union[int, Action]) -> (np.array, int, bool, dict):
# execute the action
state.act(action)

reward_before = self.last_player_rewards[state.current_player.id]
reward_after = [weight * function.calculate(state, for_player=current_player_id)
for function, weight in zip(self.reward_functions, self.reward_weights)]

# init return info
reward = 0
done = False
info = {'phase': state.phase,
'turn': state.turn,
Expand All @@ -134,11 +144,24 @@ def step(self, action: Union[int, Action]) -> (np.array, int, bool, dict):
self.results.append(1 if winner == PlayerOrder.FIRST else -1)
info['winner'].append(winner)

reward = np.mean(self.results)
try:
win_loss_reward_index = self.reward_functions.index("win-loss")
reward_after[win_loss_reward_index] = np.mean(self.results)
except ValueError:
pass

done = True

del info['turn']

if reward_before is None:
raw_rewards = (0.0,) * len(self.reward_functions)
else:
raw_rewards = tuple([after - before for before, after in zip(reward_before, reward_after)])

info['raw_rewards'] = raw_rewards
reward = sum(raw_rewards)

return self.encode_state(), reward, done, info

def do_match(self, state):
Expand Down
61 changes: 61 additions & 0 deletions gym_locm/envs/rewards.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from abc import ABC, abstractmethod

from gym_locm.agents import CoacBattleAgent
from gym_locm.engine import State, PlayerOrder


class RewardFunction(ABC):
@abstractmethod
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
pass


class WinLossRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
if state.winner == for_player:
return 1
elif state.winner == for_player.opposing():
return -1
else:
return 0


class PlayerHealthRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
return state.players[for_player].health / 30


class OpponentHealthRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
return -max(0, state.players[for_player.opposing()].health) / 30


class PlayerBoardPresenceRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
return sum(creature.attack for lane in state.players[for_player].lanes for creature in lane)


class OpponentBoardPresenceRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
return -sum(creature.attack for lane in state.players[for_player.opposing()].lanes for creature in lane)


class CoacRewardFunction(RewardFunction):
def calculate(self, state: State, for_player: PlayerOrder = PlayerOrder.FIRST):
signal = 1 if state.current_player.id == for_player else -1

return min(1, max(-1, signal * CoacBattleAgent.eval_state(state) / 2000))


available_rewards = {
"win-loss": WinLossRewardFunction,
"player-health": PlayerHealthRewardFunction,
"opponent-health": OpponentHealthRewardFunction,
"player-board-presence": PlayerBoardPresenceRewardFunction,
"opponent-board-presence": OpponentBoardPresenceRewardFunction,
"coac": CoacRewardFunction
}


def parse_reward(reward_name: str):
return available_rewards[reward_name.lower().replace(" ", "-")]
2 changes: 1 addition & 1 deletion gym_locm/experiments/hyp-search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from hyperopt.pyll import scope

from gym_locm.agents import MaxAttackBattleAgent, GreedyBattleAgent, MaxAttackDraftAgent
from gym_locm.toolbox.trainer import AsymmetricSelfPlay, model_builder_mlp, model_builder_lstm
from gym_locm.toolbox.trainer_draft import AsymmetricSelfPlay, model_builder_mlp, model_builder_lstm

hyperparameter_space = {
'switch_freq': hp.choice('switch_freq', [10, 100, 1000]),
Expand Down
19 changes: 0 additions & 19 deletions gym_locm/experiments/training-battle.py

This file was deleted.

Loading

0 comments on commit 8fe6d14

Please sign in to comment.