diff --git a/main.py b/main.py index 551bea2..83982e0 100644 --- a/main.py +++ b/main.py @@ -101,6 +101,12 @@ def main(): episode_total_loss = np.zeros( shape=(n_agents, learnings_per_episode)) + episode_entropy = np.zeros( + shape=(n_agents, learnings_per_episode)) + + episode_advantage = np.zeros( + shape=(n_agents, learnings_per_episode)) + # Main game loop for step in tqdm(range(episode_length), leave=False, @@ -127,6 +133,12 @@ def main(): episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\ = player.agent.total_loss + episode_entropy[player.player_id][learn_iters % learnings_per_episode]\ + = player.agent.entropy + + episode_advantage[player.player_id][learn_iters % learnings_per_episode]\ + = player.agent.advantage + learn_iters += 1 @@ -151,6 +163,12 @@ def main(): time_alive[player.player_id][episode] = step + entropy[player.player_id][episode] = np.mean( + episode_entropy) + + advantage[player.player_id][episode] = np.mean( + episode_advantage) + # Check for new best score if score > best_score[player.player_id]: print(f"\nEpisode:\ diff --git a/utils/metrics.py b/utils/metrics.py index 990dfd9..d1f206e 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -3,7 +3,7 @@ import numpy as np import matplotlib.pyplot as plt -def plot_learning_curve(scores, num_players, figure_path): +def plot_learning_curve(scores, num_players, figure_path, ep_lenght): plt.figure() plt.title("Running Average - Score") @@ -13,7 +13,7 @@ def plot_learning_curve(scores, num_players, figure_path): for score in scores: running_avg = np.zeros(len(score)) for i in range(len(score)): - running_avg[i] = np.mean(score[max(0, i-100):(i+1)]) + running_avg[i] = np.mean(score[max(0, i-int(ep_length/10)):(i+1)]) plt.plot(running_avg) plt.savefig(os.path.join(figure_path, "avg_score.png")) plt.close() @@ -40,7 +40,7 @@ def plot_score(scores, num_players, figure_path): plt.close() -def plot_loss(nn_type, losses, num_players, figure_path): +def plot_loss(nn_type, losses, num_players, figure_path, ep_length): plt.figure() plt.title(f"Running Average - {nn_type.capitalize()} Loss") @@ -50,13 +50,13 @@ def plot_loss(nn_type, losses, num_players, figure_path): for loss in losses: running_avg = np.zeros(len(loss)) for i in range(len(loss)): - running_avg[i] = np.mean(loss[max(0, i-100):(i+1)]) + running_avg[i] = np.mean(loss[max(0, i-int(ep_length/10)):(i+1)]) plt.plot(running_avg) plt.savefig(os.path.join(figure_path, f"{nn_type}_loss.png")) plt.close() -def plot_parameter(name, parameter, num_players, figure_path): +def plot_parameter(name, parameter, num_players, figure_path, ep_length): plt.figure() plt.title(f"Running Average - {name.capitalize()}") @@ -66,7 +66,7 @@ def plot_parameter(name, parameter, num_players, figure_path): for param in parameter: running_avg = np.zeros(len(param)) for i in range(len(param)): - running_avg[i] = np.mean(param[max(0, i-100):(i+1)]) + running_avg[i] = np.mean(param[max(0, i-int(ep_length/10)):(i+1)]) plt.plot(running_avg) plt.savefig(os.path.join(figure_path, f"{name}.png")) plt.close()