diff --git a/main.py b/multi-agent.py similarity index 91% rename from main.py rename to multi-agent.py index 404a21e..37c5d53 100644 --- a/main.py +++ b/multi-agent.py @@ -14,10 +14,10 @@ random.seed(1) np.random.seed(1) T.manual_seed(1) -n_episodes = 10000 +n_episodes = 2000 game_len = 5000 -figure_file = 'plots/score.png' +figure_file = 'plots/scores_mp.png' game = Game() @@ -34,9 +34,10 @@ for i in tqdm(range(n_episodes)): game.level.__init__(reset=True) # TODO: Make game.level.reset_map() so we don't pull out and load the agent every time (There is -definitevly- a better way) for player in game.level.player_sprites: + + player.stats.exp = score_history[player.player_id][i-1] for agent in agent_list: player.agent = agent_list[player.player_id] - player.stats.exp = score_history[player.player_id][i-1] agent_list = [0 for _ in range(game.max_num_players)] @@ -55,8 +56,6 @@ for i in tqdm(range(n_episodes)): # for player in game.level.player_sprites: # for enemy in game.level.enemy_sprites: # player.stats.exp *= .95 - else: - break for player in game.level.player_sprites: if not player.is_dead(): @@ -73,10 +72,11 @@ for i in tqdm(range(n_episodes)): print("Models saved ...\n") print( - f"\nCumulative score for player {player.player_id}: {score_history[0][i]}\nAverage score for player {player.player_id}: {avg_score[player.player_id]}\nBest score for player {player.player_id}: {best_score[player.player_id]}") + f"\nCumulative score for player {player.player_id}: {score_history[0][i]}\nAverage score for player {player.player_id}: {avg_score[player.player_id]}\nBest score for player {player.player_id}: {best_score[player.player_id]}") -plt.plot(score_history[0]) +plt.plot(score_history) +plt.savefig(figure_file) game.quit() diff --git a/single-agent.py b/single-agent.py new file mode 100644 index 0000000..58c17b5 --- /dev/null +++ b/single-agent.py @@ -0,0 +1,80 @@ +import random +import torch as T +import numpy as np +import matplotlib.pyplot as plt + +from game import Game +from tqdm import tqdm + +from os import environ +environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1' + + +random.seed(1) +np.random.seed(1) +T.manual_seed(1) + +n_episodes = 2000 +game_len = 5000 + +figure_file = 'plots/score_sp.png' + +game = Game() + +agent_list = [] + +score_history = np.zeros(shape=(game.max_num_players, n_episodes)) +best_score = np.zeros(game.max_num_players) +avg_score = np.zeros(game.max_num_players) + +for i in tqdm(range(n_episodes)): + # TODO: Make game.level.reset_map() so we don't __init__ everything all the time (such a waste) + if i != 0: + game.level.__init__(reset=True) + # TODO: Make game.level.reset_map() so we don't pull out and load the agent every time (There is -definitevly- a better way) + for player in game.level.player_sprites: + + player.stats.exp = score_history[player.player_id][i-1] + player.agent = agent_list[0] + + agent_list = [] + + for j in range(game_len): + if not game.level.done: + + game.run() + game.calc_score() + + for player in game.level.player_sprites: + if player.is_dead(): + agent_list[0] = player.agent + player.kill() + + # if (j == game_len-1 or game.level.done) and game.level.enemy_sprites != []: + # for player in game.level.player_sprites: + # for enemy in game.level.enemy_sprites: + # player.stats.exp *= .95 + + for player in game.level.player_sprites: + if not player.is_dead(): + agent_list[0] = player.agent + exp_points = player.stats.exp + score_history[player.player_id][i] = exp_points + avg_score[player.player_id] = np.mean( + score_history[player.player_id]) + if np.mean(avg_score) > np.mean(best_score): + best_score = avg_score + print("Saving models for agent...") + player.agent.save_models( + actr_chkpt="player_actor", crtc_chkpt="player_critic") + print("Models saved ...\n") + + print( + f"\nAverage score: {np.mean(avg_score)}\nBest score: {np.mean(best_score)}") + + +plt.plot(score_history) +plt.savefig(figure_file) +game.quit() + +plt.show()