fix wrong data type for actions in DDPG, TD3, XDDPG, and XTD3 agents

Alwaysproblem · Jan 3, 2024 · 57071e3 · 57071e3
1 parent 978c29f
commit 57071e3
Show file tree

Hide file tree

Showing 5 changed files with 4 additions and 4 deletions.
diff --git a/DDPG/ddpg.py b/DDPG/ddpg.py
@@ -246,7 +246,7 @@ def _learn(self, experiences):
     states = torch.from_numpy(np.vstack([e.state for e in experiences])
                               ).float().to(device)
     actions = torch.from_numpy(np.vstack([e.action for e in experiences])
-                               ).long().to(device)
+                               ).float().to(device)
     rewards = torch.from_numpy(np.vstack([e.reward for e in experiences])
                                ).float().to(device)
     next_states = torch.from_numpy(

diff --git a/TD3/TD3Agent_100.gif b/TD3/TD3Agent_100.gif
diff --git a/TD3/td3.py b/TD3/td3.py
@@ -260,7 +260,7 @@ def _learn(self, experiences):
     states = torch.from_numpy(np.vstack([e.state for e in experiences])
                               ).float().to(device)
     actions = torch.from_numpy(np.vstack([e.action for e in experiences])
-                               ).long().to(device)
+                               ).float().to(device)
     rewards = torch.from_numpy(np.vstack([e.reward for e in experiences])
                                ).float().to(device)
     next_states = torch.from_numpy(

diff --git a/XDDPG/xddpg.py b/XDDPG/xddpg.py
@@ -300,7 +300,7 @@ def _learn(self, experiences):
     states = torch.from_numpy(np.vstack([e.state for e in experiences])
                               ).float().to(device)
     actions = torch.from_numpy(np.vstack([e.action for e in experiences])
-                               ).long().to(device)
+                               ).float().to(device)
     rewards = torch.from_numpy(np.vstack([e.reward for e in experiences])
                                ).float().to(device)
     next_states = torch.from_numpy(

diff --git a/XTD3/xtd3.py b/XTD3/xtd3.py
@@ -318,7 +318,7 @@ def _learn(self, experiences):
     states = torch.from_numpy(np.vstack([e.state for e in experiences])
                               ).float().to(device)
     actions = torch.from_numpy(np.vstack([e.action for e in experiences])
-                               ).long().to(device)
+                               ).float().to(device)
     rewards = torch.from_numpy(np.vstack([e.reward for e in experiences])
                                ).float().to(device)
     next_states = torch.from_numpy(