Big update

This commit is contained in:
Vasilis Valatsos 2024-02-10 18:11:28 +01:00
parent 8d3f4506ba
commit ff8fa7d9e7
68 changed files with 450 additions and 574 deletions

BIN
.DS_Store vendored

Binary file not shown.

BIN
agents/.DS_Store vendored

Binary file not shown.

BIN
agents/ppo/.DS_Store vendored

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1 +0,0 @@
# This is a folder with all the saved models.

Binary file not shown.

Binary file not shown.

91
args.py Normal file
View file

@ -0,0 +1,91 @@
import argparse
from utils.hyperparams import HPARAMS
def parse_args():
parser = argparse.ArgumentParser(
prog='Pneuma',
description='A Reinforcement Learning platform made with PyGame'
)
# Define seed
parser.add_argument('--no_seed',
default=False,
action="store_true",
help="Set to True to run without a seed.")
parser.add_argument('--seed',
type=int,
default=1,
help="The seed for the RNG.")
# Define episodes and agents
parser.add_argument('--n_episodes',
type=int,
default=300,
help="Number of episodes.")
parser.add_argument('--ep_length',
type=int,
default=5000,
help="Length of each episode.")
parser.add_argument('--n_agents',
type=int,
default=1,
help="Number of agents.")
# Define hyperparameters
parser.add_argument('--horizon',
type=int,
default=HPARAMS["horizon"],
help="The number of steps per update")
parser.add_argument('--gamma',
type=float,
default=HPARAMS["discount_factor"],
help="The discount factor for PPO")
parser.add_argument('--entropy_coeff',
type=float,
default=HPARAMS["entropy_coeff"],
help="The entropy coefficient")
parser.add_argument('--alpha',
type=float,
default=HPARAMS["learning_rate"],
help="The learning_rate for PPO")
parser.add_argument('--policy_clip',
type=float,
default=HPARAMS["policy_clip"],
help="The policy clip for PPO")
parser.add_argument('--batch_size',
type=int,
default=HPARAMS["batch_size"],
help="The size of each batch")
parser.add_argument('--n_epochs',
type=int,
default=HPARAMS["num_epochs"],
help="The number of epochs")
parser.add_argument('--gae_lambda',
type=float,
default=HPARAMS["GAE_lambda"],
help="The lambda parameter of the GAE")
# Misc
parser.add_argument('--no_training',
default=False,
action="store_true",
help="Set flag to disable learning. Useful for viewing trained agents interact in the environment.")
parser.add_argument('--show_pg',
default=False,
action="store_true",
help="Set flag to open a PyGame window on desktop")
return parser.parse_args()

BIN
chkpts/run2/A0 Normal file

Binary file not shown.

BIN
chkpts/run2/C0 Normal file

Binary file not shown.

BIN
configs/.DS_Store vendored

Binary file not shown.

View file

@ -1,7 +1,7 @@
import pygame
from random import randint
from configs.system.window_config import TILESIZE
from config.system.window import TILESIZE
class MagicPlayer:

View file

@ -1,8 +1,8 @@
import pygame
from random import randint, choice
from configs.game.spell_config import magic_data
from configs.game.weapon_config import weapon_data
from config.game.spell_config import magic_data
from config.game.weapon_config import weapon_data
from .movement import MovementHandler
from .combat import CombatHandler

View file

@ -4,7 +4,7 @@ from math import sin
from utils.resource_loader import import_folder, import_assets
from configs.system.window_config import HITBOX_OFFSET
from config.system.window import HITBOX_OFFSET
class AnimationHandler:

View file

@ -1,8 +1,8 @@
from effects.weapon_effects import Weapon
from effects.magic_effects import MagicPlayer
from configs.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data
from config.game.weapon_config import weapon_data
from config.game.spell_config import magic_data
class CombatHandler:

View file

@ -1,5 +1,5 @@
from configs.game.player_config import warrior_stats, mage_stats, tank_stats
from configs.game.monster_config import monster_data
from config.game.player_config import warrior_stats, mage_stats, tank_stats
from config.game.monster_config import monster_data
class StatsHandler:

View file

@ -1,6 +1,6 @@
import pygame
from .components.animaton import AnimationHandler
from .components.animation import AnimationHandler
from .components.stats import StatsHandler
from .components._input import InputHandler

View file

@ -2,16 +2,16 @@ import pygame
import numpy as np
from random import randint
from configs.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data
from config.game.weapon_config import weapon_data
from config.game.spell_config import magic_data
from .components.stats import StatsHandler
from .components._input import InputHandler
from .components.animaton import AnimationHandler
from .components.animation import AnimationHandler
from effects.particle_effects import AnimationPlayer
from agents.ppo.agent import Agent
from ml.ppo.agent import Agent
class Player(pygame.sprite.Sprite):

View file

@ -1,257 +0,0 @@
import pygame
import numpy as np
from random import randint
from configs.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data
from .components.stats import StatsHandler
from .components._input import InputHandler
from .components.animaton import AnimationHandler
from effects.particle_effects import AnimationPlayer
from agents.ppo.agent import Agent
class Player(pygame.sprite.Sprite):
def __init__(self,
player_id,
role,
position,
map_edge,
groups,
obstacle_sprites,
visible_sprites,
attack_sprites,
attackable_sprites
):
super().__init__(groups)
self.initial_position = position
self.map_edge = map_edge
self.player_id = player_id
self.distance_direction_from_enemy = None
# Sprite Setup
self.sprite_type = "player"
self.obstacle_sprites = obstacle_sprites
self.visible_sprites = visible_sprites
self.attack_sprites = attack_sprites
self.attackable_sprites = attackable_sprites
# Graphics Setup
self.animation_player = AnimationPlayer()
self.animation = AnimationHandler(self.sprite_type)
self.animation.import_assets(position)
# Input Setup
self._input = InputHandler(
self.sprite_type, self.animation_player)
# Setup Stats
self.role = role
self.stats = StatsHandler(self.sprite_type, self.role)
def setup_agent(self,
gamma,
alpha,
policy_clip,
batch_size,
n_epochs,
gae_lambda,
chkpt_dir,
entropy_coef,
no_load=False):
self.max_num_enemies = len(self.distance_direction_from_enemy)
self.get_current_state()
self.num_features = len(self.state_features)
self.agent = Agent(
input_dims=self.num_features,
n_actions=len(self._input.possible_actions),
gamma=gamma,
alpha=alpha,
policy_clip=policy_clip,
batch_size=batch_size,
n_epochs=n_epochs,
gae_lambda=gae_lambda,
entropy_coef=entropy_coef,
chkpt_dir=chkpt_dir
)
print(
f"\nAgent initialized on player {self.player_id} using {self.agent.actor.device}.")
if not no_load:
print("Attempting to load models ...")
try:
self.agent.load_models(
actr_chkpt=f"A{self.player_id}",
crtc_chkpt=f"C{self.player_id}"
)
print("Models loaded ...\n")
except FileNotFoundError:
print(
f"FileNotFound for player {self.player_id}.\
\nSkipping loading ...\n")
def get_status(self):
if self._input.movement.direction.x == 0\
and self._input.movement.direction.y == 0:
if 'idle' not in self._input.status and 'attack' not in self._input.status:
self._input.status += '_idle'
if self._input.attacking:
self._input.movement.direction.x = 0
self._input.movement.direction.y = 0
if 'attack' not in self._input.status:
if 'idle' in self._input.status:
self._input.status = self._input.status.replace(
'idle', 'attack')
else:
self._input.status += '_attack'
else:
if 'attack' in self._input.status:
self._input.status = self._input.status.replace('_attack', '')
def attack_logic(self):
if self.attack_sprites:
for attack_sprite in self.attack_sprites:
collision_sprites = pygame.sprite.spritecollide(
attack_sprite, self.attackable_sprites, False)
if collision_sprites:
for target_sprite in collision_sprites:
if target_sprite.sprite_type == 'grass':
pos = target_sprite.rect.center
offset = pygame.math.Vector2(0, 75)
for leaf in range(randint(3, 6)):
self.animation_player.create_grass_particles(
position=pos - offset,
groups=[self.visible_sprites])
target_sprite.kill()
else:
target_sprite.get_damaged(
self, attack_sprite.sprite_type)
def get_full_weapon_damage(self):
base_damage = self.stats.attack
weapon_damage = weapon_data[self._input.combat.weapon]['damage']
return (base_damage + weapon_damage)
def get_full_magic_damage(self):
base_damage = self.stats.magic
spell_damage = magic_data[self._input.combat.magic]['strength']
return (base_damage + spell_damage)
def get_reward(self):
self.reward = 0
# Base reward on player exp
self.reward += self.stats.exp
print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
# Add relative hp of player
self.reward += self.stats.health/self.stats.stats['health']
print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
# Take into account distance of nearest enemy from player relative to the map length
self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
# Take into account nearest enemy relative health
self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
def get_current_state(self):
if self.distance_direction_from_enemy != []:
sorted_distances = sorted(
self.distance_direction_from_enemy, key=lambda x: x[0])
else:
sorted_distances = np.zeros(self.num_features)
self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
self.action_features = [self._input.action]
self.get_reward()
self.state_features = [
self.animation.rect.center[0]/self.map_edge[0],
self.animation.rect.center[1]/self.map_edge[1],
self._input.movement.direction.x,
self._input.movement.direction.y,
self.stats.health/self.stats.stats['health'],
self.stats.energy/self.stats.stats['energy'],
1 if 'attack' in self._input.status else 0,
]
for distance, direction, enemy in self.distance_direction_from_enemy:
self.state_features.extend([
distance/np.sqrt(np.sum(self.map_edge)),
direction[0],
direction[1],
enemy.stats.health /
enemy.stats.monster_info['health'],
enemy.stats.exp,
])
if hasattr(self, 'num_features'):
while len(self.state_features) < self.num_features:
self.state_features.append(0)
self.state_features = np.array(self.state_features)
def is_dead(self):
if self.stats.health <= 0:
self.stats.health = 0
self.animation.import_assets((3264, 448))
return True
else:
return False
def agent_update(self):
# Get the current state
self.get_current_state()
# Choose action based on current state
action, probs, value\
= self.agent.choose_action(self.state_features)
# Apply chosen action
self._input.check_input(action,
self.stats.speed,
self.animation.hitbox,
self.obstacle_sprites,
self.animation.rect,
self)
self.agent.remember(self.state_features, action,
probs, value, self.reward, self.is_dead())
self.get_current_state()
def update(self):
self.agent_update()
# Cooldowns and Regen
self.stats.health_recovery()
self.stats.energy_recovery()
# Refresh player based on input and animate
self.get_status()
self.animation.animate(
self._input.status, self._input.combat.vulnerable)
self._input.cooldowns(self._input.combat.vulnerable)

View file

@ -1,6 +1,6 @@
import pygame
from configs.system.window_config import TILESIZE,\
from config.system.window import TILESIZE,\
HITBOX_OFFSET

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

BIN
figures/run1/actor_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
figures/run1/advantage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run1/avg_score.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run1/entropy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
figures/run3/actor_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
figures/run3/advantage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run3/avg_score.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
figures/run3/entropy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

30
folder_struct.py Normal file
View file

@ -0,0 +1,30 @@
import os
def set_directories(base_path):
if not os.path.exists(base_path):
os.makedirs(base_path)
trial_dirs = [directory for directory in os.listdir(
base_path) if os.path.isdir(os.path.join(base_path, directory))]
trial_nums = sorted([int(directory[-1])
for directory in trial_dirs if directory.startswith("run") and directory[-1].isdigit()])
next_trial_num = trial_nums[-1] + 1 if trial_nums else 1
new_trial_path = os.path.join(base_path, f"run{next_trial_num}")
os.makedirs(new_trial_path)
return new_trial_path
def setup_dirs():
home_folder = os.path.dirname(os.path.abspath(__file__))
chkpt_path = os.path.join(home_folder, 'chkpts')
chkpt_path = set_directories(chkpt_path)
figure_path = os.path.join(home_folder, 'figures')
figure_path = set_directories(figure_path)
return chkpt_path, figure_path

13
game.py
View file

@ -1,16 +1,19 @@
from configs.system.window_config import WIDTH,\
import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
from config.system.window import WIDTH,\
HEIGHT,\
WATER_COLOR,\
FPS
from level import Level
import pygame
import sys
import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
class Game:
class Pneuma:
def __init__(self, show_pg=False, n_players=1,):
print(f"Initializing Pneuma with {n_players} player(s).\

View file

@ -1,7 +1,7 @@
import pygame
from configs.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data
from config.game.weapon_config import weapon_data
from config.game.spell_config import magic_data
from .ui_settings import UI_FONT,\
UI_FONT_SIZE,\

View file

@ -4,14 +4,13 @@ import numpy as np
from random import choice
from configs.system.window_config import TILESIZE
from config.system.window import TILESIZE
from utils.debug import debug
from utils.resource_loader import import_csv_layout, import_folder
from interface.ui import UI
from entities.observer import Observer
from entities.player import Player
from entities.enemy import Enemy
from entities.terrain import Terrain

185
main.py Normal file
View file

@ -0,0 +1,185 @@
import os
import random
import torch as T
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import args
import folder_struct
import utils.seeds as seeds
import utils.metrics as metrics
from game import Pneuma
def main():
parsed_args = args.parse_args()
if not parsed_args.no_seed:
seeds.set_seeds(parsed_args.seed)
print(f"Seed set as {parsed_args.seed}")
else:
print("No seed set")
chkpt_path, figure_path = folder_struct.setup_dirs()
n_episodes = parsed_args.n_episodes
episode_length = parsed_args.ep_length
n_agents = parsed_args.n_agents
horizon = parsed_args.horizon
no_training = parsed_args.no_training
learnings_per_episode = int(episode_length/horizon)
learn_iters = 0
show_pygame = parsed_args.show_pg
# Setup AI metrics
# Setup parameter monitoring
score_history = np.zeros(
shape=(parsed_args.n_agents, parsed_args.n_episodes))
best_score = np.zeros(parsed_args.n_agents)
actor_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
critic_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
total_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
entropy = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
advantage = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
# score_history, best_score, actor_loss, critic_loss, total_loss, entropy, advantage = metrics.generate(parsed_args)
game = Pneuma(show_pg=show_pygame, n_players=parsed_args.n_agents)
print("Initializing agents ...")
for player in tqdm(game.level.player_sprites,
dynamic_ncols=True):
player.setup_agent(
gamma=parsed_args.gamma,
alpha=parsed_args.alpha,
policy_clip=parsed_args.policy_clip,
batch_size=parsed_args.batch_size,
n_epochs=parsed_args.n_epochs,
gae_lambda=parsed_args.gae_lambda,
entropy_coef=parsed_args.entropy_coeff,
chkpt_dir=chkpt_path,
no_load=True
)
# Episodes start
for episode in tqdm(range(n_episodes),
dynamic_ncols=True):
game.level.reset()
episode_reward = np.zeros(
shape=(n_agents, episode_length))
episode_actor_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
episode_critic_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
episode_total_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
# Main game loop
for step in tqdm(range(episode_length),
leave=False,
ascii=True,
dynamic_ncols=True):
if not game.level.done:
game.run()
for player in game.level.player_sprites:
episode_reward[player.player_id][step] = player.reward
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
player.agent.learn()
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.actor_loss
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.critic_loss
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.total_loss
learn_iters += 1
# Gather information about the episode
for player in game.level.player_sprites:
score = np.mean(episode_reward[player.player_id])
# Update score
score_history[player.player_id][episode] = score
# Update actor/critic loss
actor_loss[player.player_id][episode] = np.mean(
episode_actor_loss)
critic_loss[player.player_id][episode] = np.mean(
episode_critic_loss)
total_loss[player.player_id][episode] = np.mean(
episode_total_loss)
# Check for new best score
if score > best_score[player.player_id]:
print(f"\nEpisode:\
{episode}\
\nNew best score for player {player.player_id}:\
{score}\
\nOld best score for player {player.player_id}: \
{best_score[player.player_id]}")
best_score[player.player_id] = score
print(f"Saving models for player {player.player_id}...")
# Save models
player.agent.save_models(
f"A{player.player_id}",
f"C{player.player_id}")
print(f"Models saved to {chkpt_path}")
metrics.plot_learning_curve(score_history, parsed_args.n_agents, figure_path)
metrics.plot_loss('actor', actor_loss, parsed_args.n_agents, figure_path)
metrics.plot_loss('critic', critic_loss, parsed_args.n_agents, figure_path)
metrics.plot_parameter('entropy', entropy, parsed_args.n_agents, figure_path)
metrics.plot_parameter('advantage', advantage, parsed_args.n_agents, figure_path)
# End of training session
print("End of episodes.\
\nExiting game...")
game.quit()
if __name__ == '__main__':
main()

293
pneuma.py
View file

@ -1,293 +0,0 @@
import os
import random
import argparse
import torch as T
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from game import Game
if __name__ == "__main__":
# Create parser
parser = argparse.ArgumentParser(
prog='Pneuma',
description='A Reinforcement Learning platform made with PyGame'
)
# Add args
parser.add_argument('--no_seed',
default=False,
action="store_true",
help="Set to True to run without a seed.")
parser.add_argument('--seed',
type=int,
default=1,
help="The seed for the RNG.")
parser.add_argument('--n_episodes',
type=int,
default=300,
help="Number of episodes.")
parser.add_argument('--ep_length',
type=int,
default=5000,
help="Length of each episode.")
parser.add_argument('--n_players',
type=int,
default=1,
help="Number of players.")
parser.add_argument('--chkpt_path',
type=str,
default="agents/saved_models",
help="Save/load location for agent models.")
parser.add_argument('--figure_path',
type=str,
default="figures",
help="Save location for figures.")
parser.add_argument('--horizon',
type=int,
default=2048,
help="The number of steps per update")
parser.add_argument('--show_pg',
default=False,
action="store_true",
help="Set to True to open PyGame window on desktop")
parser.add_argument('--no_load',
default=False,
action="store_true",
help="Set to True to ignore saved models")
parser.add_argument('--gamma',
type=float,
default=0.99,
help="The gamma parameter for PPO")
parser.add_argument('--entropy',
type=float,
default=0.01,
help="The entropy coefficient")
parser.add_argument('--alpha',
type=float,
default=0.0003,
help="The alpha parameter for PPO")
parser.add_argument('--policy_clip',
type=float,
default=0.1,
help="The policy clip")
parser.add_argument('--batch_size',
type=int,
default=128,
help="The size of each batch")
parser.add_argument('--n_epochs',
type=int,
default=20,
help="The number of epochs")
parser.add_argument('--gae_lambda',
type=float,
default=0.95,
help="The lambda parameter of the GAE")
parser.add_argument('--no_training',
default=False,
action="store_true",
help="Decides if the algorithm should train.")
args = parser.parse_args()
random.seed(args.seed)
np.random.seed(args.seed)
T.manual_seed(args.seed)
n_episodes = args.n_episodes
episode_length = args.ep_length
n_players = args.n_players
home_folder = os.path.dirname(os.path.abspath(__file__))
chkpt_path = os.path.join(home_folder, args.chkpt_path)
figure_path = os.path.join(home_folder, args.figure_path)
horizon = args.horizon
no_training = args.no_training
learnings_per_episode = int(episode_length/horizon)
learn_iters = 0
show_pygame = args.show_pg
# Setup AI stuff
score_history = np.zeros(shape=(n_players, n_episodes))
best_score = np.zeros(n_players)
actor_loss = np.zeros(shape=(n_players,
n_episodes))
critic_loss = np.zeros(shape=(n_players,
n_episodes))
total_loss = np.zeros(shape=(n_players,
n_episodes))
game = Game(show_pg=show_pygame, n_players=n_players)
print("Initializing agents ...")
for player in tqdm(game.level.player_sprites,
dynamic_ncols=True):
player.setup_agent(
gamma=args.gamma,
alpha=args.alpha,
policy_clip=args.policy_clip,
batch_size=args.batch_size,
n_epochs=args.n_epochs,
gae_lambda=args.gae_lambda,
entropy_coef=args.entropy,
chkpt_dir=chkpt_path,
no_load=args.no_load
)
# Episodes start
for episode in tqdm(range(n_episodes),
dynamic_ncols=True):
game.level.reset()
episode_reward = np.zeros(
shape=(n_players, episode_length))
episode_actor_loss = np.zeros(
shape=(n_players, learnings_per_episode))
episode_critic_loss = np.zeros(
shape=(n_players, learnings_per_episode))
episode_total_loss = np.zeros(
shape=(n_players, learnings_per_episode))
# Main game loop
for step in tqdm(range(episode_length),
leave=False,
ascii=True,
dynamic_ncols=True):
if not game.level.done:
game.run()
for player in game.level.player_sprites:
episode_reward[player.player_id][step] = player.reward
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
player.agent.learn()
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.actor_loss
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.critic_loss
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.total_loss
learn_iters += 1
# Gather information about the episode
for player in game.level.player_sprites:
score = np.mean(episode_reward[player.player_id])
# Update score
score_history[player.player_id][episode] = score
# Update actor/critic loss
actor_loss[player.player_id][episode] = np.mean(
episode_actor_loss)
critic_loss[player.player_id][episode] = np.mean(
episode_critic_loss)
total_loss[player.player_id][episode] = np.mean(
episode_total_loss)
# Check for new best score
if score > best_score[player.player_id]:
print(f"\nEpisode:\
{episode}\
\nNew best score for player {player.player_id}:\
{score}\
\nOld best score for player {player.player_id}: \
{best_score[player.player_id]}")
best_score[player.player_id] = score
print(f"Saving models for player {player.player_id}...")
# Save models
player.agent.save_models(
f"A{player.player_id}",
f"C{player.player_id}")
print(f"Models saved to {chkpt_path}")
plt.figure()
plt.title("Agent Rewards")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(n_players)])
for player_score in score_history:
plt.plot(player_score)
plt.savefig(os.path.join(figure_path, 'score.png'))
plt.close()
plt.figure()
plt.suptitle("Actor Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for actor in actor_loss:
plt.plot(actor)
plt.savefig(os.path.join(figure_path, 'actor_loss.png'))
plt.close()
plt.figure()
plt.suptitle("Critic Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for critic in critic_loss:
plt.plot(critic)
plt.savefig(os.path.join(figure_path, 'critic_loss.png'))
plt.close()
plt.figure()
plt.suptitle("Total Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for total in total_loss:
plt.plot(total)
plt.savefig(os.path.join(figure_path, 'total_loss.png'))
plt.close()
# End of training session
print("End of episodes.\
\nExiting game...")
game.quit()

16
utils/hyperparams.py Normal file
View file

@ -0,0 +1,16 @@
HPARAMS = {
"horizon": 2048,
"num_epochs": 15,
"batch_size": 128,
"policy_clip": 0.1,
"discount_factor": 0.99,
"GAE_lambda": 0.95,
"entropy_coeff": 0.01,
"value_coeff": 0.5,
"learning_rate": 0.0003,
}

92
utils/metrics.py Normal file
View file

@ -0,0 +1,92 @@
import os
import numpy as np
import matplotlib.pyplot as plt
def generate(parsed_args):
# Setup parameter monitoring
score_history = np.zeros(
shape=(parsed_args.n_agents, parsed_args.n_episodes))
best_score = np.zeros(parsed_args.n_agents)
actor_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
critic_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
total_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
entropy = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
advantage = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
return score_history, best_score, actor_loss,
critic_loss, total_loss, entropy,
advantage
def plot_learning_curve(scores, num_players, figure_path):
plt.figure()
plt.title("Running Average - Score")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(num_players)])
for score in scores:
running_avg = np.zeros(len(score))
for i in range(len(score)):
running_avg[i] = np.mean(score[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, "avg_score.png"))
plt.close()
def plot_score(scores, num_players, figure_path):
plt.figure()
plt.title("Agent Rewards - No Averaging")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(num_players)])
for player_score in scores:
plt.plot(player_score)
plt.savefig(os.path.join(figure_path, 'score.png'))
plt.close()
def plot_loss(nn_type, losses, num_players, figure_path):
plt.figure()
plt.title(f"Running Average - {nn_type.capitalize()} Loss")
plt.xlabel("Learning Iterations")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(num_players)])
for loss in losses:
running_avg = np.zeros(len(loss))
for i in range(len(loss)):
running_avg[i] = np.mean(loss[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, f"{nn_type}_loss.png"))
plt.close()
def plot_parameter(name, parameter, num_players, figure_path):
plt.figure()
plt.title(f"Running Average - {name.capitalize()}")
plt.xlabel("Learning Iterations")
plt.ylabel(f"{name.capitalize()}")
plt.legend([f"Agent {num}" for num in range(num_players)])
for param in parameter:
running_avg = np.zeros(len(param))
for i in range(len(param)):
running_avg[i] = np.mean(param[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, f"{name}.png"))
plt.close()

11
utils/seeds.py Normal file
View file

@ -0,0 +1,11 @@
import random
import torch as T
import numpy as np
def set_seeds(value):
random.seed(value)
np.random.seed(value)
T.manual_seed(value)
T.cuda.manual_seed_all(value)