diff --git a/agents/ppo/agent.py b/agents/ppo/agent.py index a245bf8..c8aeade 100644 --- a/agents/ppo/agent.py +++ b/agents/ppo/agent.py @@ -102,22 +102,22 @@ class Agent: self.critic.optimizer.zero_grad() self.total_loss.backward() - T.nn.utils.clip_grad_norm_( - self.actor.parameters(), max_norm=2) - - T.nn.utils.clip_grad_norm_( - self.critic.parameters(), max_norm=2) - - # Calculate the gradient norms for both networks - actor_grad_norm = T.nn.utils.clip_grad_norm_( - self.actor.parameters(), max_norm=2) - - critic_grad_norm = T.nn.utils.clip_grad_norm_( - self.critic.parameters(), max_norm=2) - - # Log or print the gradient norms - print(f"Actor Gradient Norm: {actor_grad_norm}") - print(f"Critic Gradient Norm: {critic_grad_norm}") + # T.nn.utils.clip_grad_norm_( + # self.actor.parameters(), max_norm=2) + # + # T.nn.utils.clip_grad_norm_( + # self.critic.parameters(), max_norm=2) + # + # # Calculate the gradient norms for both networks + # actor_grad_norm = T.nn.utils.clip_grad_norm_( + # self.actor.parameters(), max_norm=2) + # + # critic_grad_norm = T.nn.utils.clip_grad_norm_( + # self.critic.parameters(), max_norm=2) + # + # # Log or print the gradient norms + # print(f"Actor Gradient Norm: {actor_grad_norm}") + # print(f"Critic Gradient Norm: {critic_grad_norm}") self.actor.optimizer.step() self.critic.optimizer.step()