-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRobot.py
83 lines (63 loc) · 2.82 KB
/
Robot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from QRobot import QRobot
import torch
import torch.nn as nn
class DQNModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQNModel, self).__init__()
self.fc1 = nn.Linear(input_dim, 64)
self.fc2 = nn.Linear(64, 64)
self.fc3 = nn.Linear(64, output_dim)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
class Robot(QRobot):
def __init__(self, maze):
super(Robot, self).__init__(maze)
# 添加自定义参数和初始化逻辑
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = self.build_model().to(self.device)
self.target_model = self.build_model().to(self.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
self.criterion = nn.MSELoss()
self.memory = [] # 用于存储训练数据的经验回放缓冲区
def build_model(self):
input_dim = 2 # 根据实际情况设置输入维度
output_dim = 4 # 根据实际情况设置输出维度
model = DQNModel(input_dim, output_dim)
return model
def train_update(self):
state = self.sense_state() # 获取当前状态
state = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(self.device)
q_values = self.model(state) # 通过模型获取当前状态下各个动作的Q值
# 根据Q值选择动作
action = torch.argmax(q_values).item()
reward = self.maze.move_robot(action) # 执行动作并获取奖励值
next_state = self.sense_state() # 获取下一个状态
next_state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0).to(self.device)
next_q_values = self.target_model(next_state) # 通过目标模型获取下一个状态下各个动作的Q值
# 计算目标Q值
max_next_q_value = torch.max(next_q_values).item()
target_q_value = reward + self.discount_factor * max_next_q_value
# 计算当前Q值
current_q_value = q_values[0][action]
# 计算损失函数
loss = self.criterion(current_q_value, target_q_value)
# 更新模型参数
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return action, reward
def test_update(self):
state = self.sense_state() # 获取当前状态
state = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(self.device)
q_values = self.model(state) # 通过模型获取当前状态下各个动作的Q值
# 根据Q值选择动作
action = torch.argmax(q_values).item()
reward = self.maze.move_robot(action) # 执行动作并获取奖励值
return action, reward