diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..746009c Binary files /dev/null and b/.DS_Store differ diff --git a/agents/.DS_Store b/agents/.DS_Store new file mode 100644 index 0000000..ede64cb Binary files /dev/null and b/agents/.DS_Store differ diff --git a/agents/ppo/agent.py b/agents/ppo/agent.py index ebc7bba..b7215c0 100644 --- a/agents/ppo/agent.py +++ b/agents/ppo/agent.py @@ -102,16 +102,17 @@ class Agent: self.critic.optimizer.zero_grad() self.total_loss.backward() + T.nn.utils.clip_grad_norm_( + self.actor.parameters(), max_norm=2) + T.nn.utils.clip_grad_norm_( + self.critic.parameters(), max_norm=2) + # Calculate the gradient norms for both networks actor_grad_norm = T.nn.utils.clip_grad_norm_( self.actor.parameters(), max_norm=1) critic_grad_norm = T.nn.utils.clip_grad_norm_( self.critic.parameters(), max_norm=1) - T.nn.utils.clip_grad_norm_( - self.actor.parameters(), max_norm=1) - T.nn.utils.clip_grad_norm_( - self.critic.parameters(), max_norm=1) # Log or print the gradient norms print(f"Actor Gradient Norm: {actor_grad_norm}") print(f"Critic Gradient Norm: {critic_grad_norm}") diff --git a/agents/ppo/brain.py b/agents/ppo/brain.py index aa0728c..6dc02b9 100644 --- a/agents/ppo/brain.py +++ b/agents/ppo/brain.py @@ -96,9 +96,9 @@ class CriticNetwork(nn.Module): self.critic = nn.Sequential( nn.Linear(input_dims, fc1_dims), - nn.ReLU(), + nn.LeakyReLU(), nn.Linear(fc1_dims, fc2_dims), - nn.ReLU(), + nn.LeakyReLU(), nn.Linear(fc2_dims, 1) )