Skip to content

Commit 90835ae

Browse files
add all files for project 3 Multi_Agent Collaboration and Competition
1 parent 7344b04 commit 90835ae

File tree

118 files changed

+27047
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+27047
-0
lines changed

Project-3_Collaboration_and_Competition/Collaboration_and_Competition/.ipynb_checkpoints/Competitive_Multi-Agent-checkpoint.ipynb

Lines changed: 473 additions & 0 deletions
Large diffs are not rendered by default.

Project-3_Collaboration_and_Competition/Collaboration_and_Competition/.ipynb_checkpoints/Tennis-checkpoint.ipynb

Lines changed: 461 additions & 0 deletions
Large diffs are not rendered by default.

Project-3_Collaboration_and_Competition/Collaboration_and_Competition/Competitive_Multi-Agent.ipynb

Lines changed: 461 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from ddpg_agent import Agent
2+
from collections import deque,namedtuple
3+
import random
4+
import numpy as np
5+
import torch
6+
7+
8+
BATCH_SIZE = 256
9+
BUFFER_SIZE = int(1e6)
10+
LEARN_NUMBER = 4
11+
GAMMA = 0.99 # discount factor
12+
13+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14+
15+
16+
class maddpg():
17+
18+
def __init__(self,state_size,action_size,num_agents,random_seeds):
19+
20+
self.state_size = state_size
21+
self.action_size = action_size
22+
self.num_agents = num_agents
23+
self.random_seeds = random_seeds
24+
self.agents = [Agent(self.state_size,self.action_size,random_seeds[i]) for i in range(self.num_agents)]
25+
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed = 7)
26+
27+
def act(self,states,add_noise = True):
28+
29+
actions = [agent.act(state,add_noise) for agent,state in zip(self.agents,states)]
30+
return actions
31+
32+
def reset(self):
33+
for i in range(self.num_agents):
34+
self.agents[i].reset()
35+
36+
def step(self, states, actions, rewards, next_states, dones):
37+
"""Save experience in replay memory, and use random sample from buffer to learn."""
38+
# Save experience / reward
39+
for state, action, reward, next_state, done in zip(states, actions, rewards, next_states, dones):
40+
#for idx, agent in enumerate(self.maddpg_agent):
41+
self.memory.add(state, action, reward, next_state, done)
42+
43+
44+
# Learn, if enough samples are available in memory
45+
if len(self.memory) > BATCH_SIZE:
46+
for agent in self.agents:
47+
for _ in range(LEARN_NUMBER):
48+
experiences = self.memory.sample()
49+
agent.learn(experiences)
50+
51+
52+
class ReplayBuffer:
53+
"""Fixed-size buffer to store experience tuples."""
54+
55+
def __init__(self, action_size, buffer_size, batch_size, seed):
56+
"""Initialize a ReplayBuffer object.
57+
Params
58+
======
59+
buffer_size (int): maximum size of buffer
60+
batch_size (int): size of each training batch
61+
"""
62+
self.action_size = action_size
63+
self.memory = deque(maxlen=buffer_size) # internal memory (deque)
64+
self.batch_size = batch_size
65+
self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
66+
self.seed = random.seed(seed)
67+
68+
def add(self, state, action, reward, next_state, done):
69+
"""Add a new experience to memory."""
70+
e = self.experience(state, action, reward, next_state, done)
71+
self.memory.append(e)
72+
73+
def sample(self):
74+
"""Randomly sample a batch of experiences from memory."""
75+
experiences = random.sample(self.memory, k=self.batch_size)
76+
77+
states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
78+
actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(device)
79+
rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
80+
next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
81+
dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)
82+
return (states, actions, rewards, next_states, dones)
83+
84+
def __len__(self):
85+
"""Return the current size of internal memory."""
86+
return len(self.memory)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
This file defines some of the browsers that Microsoft's implementation provides in
3+
<windir>\Microsoft.NET\Framework\<ver>\CONFIG\Browsers\*.browser
4+
5+
It is not derived from any file distributed with Microsoft's implementation. Since
6+
we can't distribute MS's browser files, we use browscap.ini to determine
7+
browser capabilities. Then, if and only if the application contains App_Browser/*.browser
8+
files and we are using .NET 2.0 or higher, we supplement the capabilities with the
9+
information in those files and the files in this directory. The primary goal of this file
10+
is provide browser definitions that might be referenced in App_Browser/*.browser files.
11+
-->
12+
<browsers>
13+
<defaultBrowser id="Default">
14+
</defaultBrowser>
15+
<browser id="Default">
16+
<identification>
17+
<userAgent match="." />
18+
</identification>
19+
</browser>
20+
<browser id="IE6to9" parentID="Default">
21+
<identification>
22+
<capability name="majorver" match="^[6-9]" />
23+
<capability name="browser" match="^(IE|AOL)$" />
24+
</identification>
25+
</browser>
26+
<browser id="Opera8to9" parentID="Default">
27+
<identification>
28+
<capability name="majorver" match="^[8-9]" />
29+
<capability name="browser" match="^Opera$" />
30+
</identification>
31+
</browser>
32+
<browser id="Safari" parentID="Default">
33+
<identification>
34+
<capability name="browser" match="^Safari$" />
35+
</identification>
36+
</browser>
37+
<browser id="Mozilla" parentID="Default">
38+
<identification>
39+
<capability name="browser" match="^Mozilla" />
40+
</identification>
41+
</browser>
42+
</browsers>

0 commit comments

Comments
 (0)