Big update

This commit is contained in:
Vasilis Valatsos 2024-02-10 18:11:28 +01:00
parent 8d3f4506ba
commit ff8fa7d9e7
68 changed files with 450 additions and 574 deletions

BIN
.DS_Store vendored

Binary file not shown.

BIN
agents/.DS_Store vendored

Binary file not shown.

BIN
agents/ppo/.DS_Store vendored

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1 +0,0 @@
# This is a folder with all the saved models.

Binary file not shown.

Binary file not shown.

91
args.py Normal file
View file

@ -0,0 +1,91 @@
import argparse
from utils.hyperparams import HPARAMS
def parse_args():
parser = argparse.ArgumentParser(
prog='Pneuma',
description='A Reinforcement Learning platform made with PyGame'
)
# Define seed
parser.add_argument('--no_seed',
default=False,
action="store_true",
help="Set to True to run without a seed.")
parser.add_argument('--seed',
type=int,
default=1,
help="The seed for the RNG.")
# Define episodes and agents
parser.add_argument('--n_episodes',
type=int,
default=300,
help="Number of episodes.")
parser.add_argument('--ep_length',
type=int,
default=5000,
help="Length of each episode.")
parser.add_argument('--n_agents',
type=int,
default=1,
help="Number of agents.")
# Define hyperparameters
parser.add_argument('--horizon',
type=int,
default=HPARAMS["horizon"],
help="The number of steps per update")
parser.add_argument('--gamma',
type=float,
default=HPARAMS["discount_factor"],
help="The discount factor for PPO")
parser.add_argument('--entropy_coeff',
type=float,
default=HPARAMS["entropy_coeff"],
help="The entropy coefficient")
parser.add_argument('--alpha',
type=float,
default=HPARAMS["learning_rate"],
help="The learning_rate for PPO")
parser.add_argument('--policy_clip',
type=float,
default=HPARAMS["policy_clip"],
help="The policy clip for PPO")
parser.add_argument('--batch_size',
type=int,
default=HPARAMS["batch_size"],
help="The size of each batch")
parser.add_argument('--n_epochs',
type=int,
default=HPARAMS["num_epochs"],
help="The number of epochs")
parser.add_argument('--gae_lambda',
type=float,
default=HPARAMS["GAE_lambda"],
help="The lambda parameter of the GAE")
# Misc
parser.add_argument('--no_training',
default=False,
action="store_true",
help="Set flag to disable learning. Useful for viewing trained agents interact in the environment.")
parser.add_argument('--show_pg',
default=False,
action="store_true",
help="Set flag to open a PyGame window on desktop")
return parser.parse_args()

BIN
chkpts/run2/A0 Normal file

Binary file not shown.

BIN
chkpts/run2/C0 Normal file

Binary file not shown.

BIN
configs/.DS_Store vendored

Binary file not shown.

View file

@ -1,7 +1,7 @@
import pygame import pygame
from random import randint from random import randint
from configs.system.window_config import TILESIZE from config.system.window import TILESIZE
class MagicPlayer: class MagicPlayer:

View file

@ -1,8 +1,8 @@
import pygame import pygame
from random import randint, choice from random import randint, choice
from configs.game.spell_config import magic_data from config.game.spell_config import magic_data
from configs.game.weapon_config import weapon_data from config.game.weapon_config import weapon_data
from .movement import MovementHandler from .movement import MovementHandler
from .combat import CombatHandler from .combat import CombatHandler

View file

@ -4,7 +4,7 @@ from math import sin
from utils.resource_loader import import_folder, import_assets from utils.resource_loader import import_folder, import_assets
from configs.system.window_config import HITBOX_OFFSET from config.system.window import HITBOX_OFFSET
class AnimationHandler: class AnimationHandler:

View file

@ -1,8 +1,8 @@
from effects.weapon_effects import Weapon from effects.weapon_effects import Weapon
from effects.magic_effects import MagicPlayer from effects.magic_effects import MagicPlayer
from configs.game.weapon_config import weapon_data from config.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data from config.game.spell_config import magic_data
class CombatHandler: class CombatHandler:

View file

@ -1,5 +1,5 @@
from configs.game.player_config import warrior_stats, mage_stats, tank_stats from config.game.player_config import warrior_stats, mage_stats, tank_stats
from configs.game.monster_config import monster_data from config.game.monster_config import monster_data
class StatsHandler: class StatsHandler:

View file

@ -1,6 +1,6 @@
import pygame import pygame
from .components.animaton import AnimationHandler from .components.animation import AnimationHandler
from .components.stats import StatsHandler from .components.stats import StatsHandler
from .components._input import InputHandler from .components._input import InputHandler

View file

@ -2,16 +2,16 @@ import pygame
import numpy as np import numpy as np
from random import randint from random import randint
from configs.game.weapon_config import weapon_data from config.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data from config.game.spell_config import magic_data
from .components.stats import StatsHandler from .components.stats import StatsHandler
from .components._input import InputHandler from .components._input import InputHandler
from .components.animaton import AnimationHandler from .components.animation import AnimationHandler
from effects.particle_effects import AnimationPlayer from effects.particle_effects import AnimationPlayer
from agents.ppo.agent import Agent from ml.ppo.agent import Agent
class Player(pygame.sprite.Sprite): class Player(pygame.sprite.Sprite):

View file

@ -1,257 +0,0 @@
import pygame
import numpy as np
from random import randint
from configs.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data
from .components.stats import StatsHandler
from .components._input import InputHandler
from .components.animaton import AnimationHandler
from effects.particle_effects import AnimationPlayer
from agents.ppo.agent import Agent
class Player(pygame.sprite.Sprite):
def __init__(self,
player_id,
role,
position,
map_edge,
groups,
obstacle_sprites,
visible_sprites,
attack_sprites,
attackable_sprites
):
super().__init__(groups)
self.initial_position = position
self.map_edge = map_edge
self.player_id = player_id
self.distance_direction_from_enemy = None
# Sprite Setup
self.sprite_type = "player"
self.obstacle_sprites = obstacle_sprites
self.visible_sprites = visible_sprites
self.attack_sprites = attack_sprites
self.attackable_sprites = attackable_sprites
# Graphics Setup
self.animation_player = AnimationPlayer()
self.animation = AnimationHandler(self.sprite_type)
self.animation.import_assets(position)
# Input Setup
self._input = InputHandler(
self.sprite_type, self.animation_player)
# Setup Stats
self.role = role
self.stats = StatsHandler(self.sprite_type, self.role)
def setup_agent(self,
gamma,
alpha,
policy_clip,
batch_size,
n_epochs,
gae_lambda,
chkpt_dir,
entropy_coef,
no_load=False):
self.max_num_enemies = len(self.distance_direction_from_enemy)
self.get_current_state()
self.num_features = len(self.state_features)
self.agent = Agent(
input_dims=self.num_features,
n_actions=len(self._input.possible_actions),
gamma=gamma,
alpha=alpha,
policy_clip=policy_clip,
batch_size=batch_size,
n_epochs=n_epochs,
gae_lambda=gae_lambda,
entropy_coef=entropy_coef,
chkpt_dir=chkpt_dir
)
print(
f"\nAgent initialized on player {self.player_id} using {self.agent.actor.device}.")
if not no_load:
print("Attempting to load models ...")
try:
self.agent.load_models(
actr_chkpt=f"A{self.player_id}",
crtc_chkpt=f"C{self.player_id}"
)
print("Models loaded ...\n")
except FileNotFoundError:
print(
f"FileNotFound for player {self.player_id}.\
\nSkipping loading ...\n")
def get_status(self):
if self._input.movement.direction.x == 0\
and self._input.movement.direction.y == 0:
if 'idle' not in self._input.status and 'attack' not in self._input.status:
self._input.status += '_idle'
if self._input.attacking:
self._input.movement.direction.x = 0
self._input.movement.direction.y = 0
if 'attack' not in self._input.status:
if 'idle' in self._input.status:
self._input.status = self._input.status.replace(
'idle', 'attack')
else:
self._input.status += '_attack'
else:
if 'attack' in self._input.status:
self._input.status = self._input.status.replace('_attack', '')
def attack_logic(self):
if self.attack_sprites:
for attack_sprite in self.attack_sprites:
collision_sprites = pygame.sprite.spritecollide(
attack_sprite, self.attackable_sprites, False)
if collision_sprites:
for target_sprite in collision_sprites:
if target_sprite.sprite_type == 'grass':
pos = target_sprite.rect.center
offset = pygame.math.Vector2(0, 75)
for leaf in range(randint(3, 6)):
self.animation_player.create_grass_particles(
position=pos - offset,
groups=[self.visible_sprites])
target_sprite.kill()
else:
target_sprite.get_damaged(
self, attack_sprite.sprite_type)
def get_full_weapon_damage(self):
base_damage = self.stats.attack
weapon_damage = weapon_data[self._input.combat.weapon]['damage']
return (base_damage + weapon_damage)
def get_full_magic_damage(self):
base_damage = self.stats.magic
spell_damage = magic_data[self._input.combat.magic]['strength']
return (base_damage + spell_damage)
def get_reward(self):
self.reward = 0
# Base reward on player exp
self.reward += self.stats.exp
print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
# Add relative hp of player
self.reward += self.stats.health/self.stats.stats['health']
print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
# Take into account distance of nearest enemy from player relative to the map length
self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
# Take into account nearest enemy relative health
self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
def get_current_state(self):
if self.distance_direction_from_enemy != []:
sorted_distances = sorted(
self.distance_direction_from_enemy, key=lambda x: x[0])
else:
sorted_distances = np.zeros(self.num_features)
self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
self.action_features = [self._input.action]
self.get_reward()
self.state_features = [
self.animation.rect.center[0]/self.map_edge[0],
self.animation.rect.center[1]/self.map_edge[1],
self._input.movement.direction.x,
self._input.movement.direction.y,
self.stats.health/self.stats.stats['health'],
self.stats.energy/self.stats.stats['energy'],
1 if 'attack' in self._input.status else 0,
]
for distance, direction, enemy in self.distance_direction_from_enemy:
self.state_features.extend([
distance/np.sqrt(np.sum(self.map_edge)),
direction[0],
direction[1],
enemy.stats.health /
enemy.stats.monster_info['health'],
enemy.stats.exp,
])
if hasattr(self, 'num_features'):
while len(self.state_features) < self.num_features:
self.state_features.append(0)
self.state_features = np.array(self.state_features)
def is_dead(self):
if self.stats.health <= 0:
self.stats.health = 0
self.animation.import_assets((3264, 448))
return True
else:
return False
def agent_update(self):
# Get the current state
self.get_current_state()
# Choose action based on current state
action, probs, value\
= self.agent.choose_action(self.state_features)
# Apply chosen action
self._input.check_input(action,
self.stats.speed,
self.animation.hitbox,
self.obstacle_sprites,
self.animation.rect,
self)
self.agent.remember(self.state_features, action,
probs, value, self.reward, self.is_dead())
self.get_current_state()
def update(self):
self.agent_update()
# Cooldowns and Regen
self.stats.health_recovery()
self.stats.energy_recovery()
# Refresh player based on input and animate
self.get_status()
self.animation.animate(
self._input.status, self._input.combat.vulnerable)
self._input.cooldowns(self._input.combat.vulnerable)

View file

@ -1,6 +1,6 @@
import pygame import pygame
from configs.system.window_config import TILESIZE,\ from config.system.window import TILESIZE,\
HITBOX_OFFSET HITBOX_OFFSET

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

BIN
figures/run1/actor_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

BIN
figures/run1/advantage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run1/avg_score.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run1/entropy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
figures/run3/actor_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
figures/run3/advantage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
figures/run3/avg_score.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
figures/run3/entropy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

30
folder_struct.py Normal file
View file

@ -0,0 +1,30 @@
import os
def set_directories(base_path):
if not os.path.exists(base_path):
os.makedirs(base_path)
trial_dirs = [directory for directory in os.listdir(
base_path) if os.path.isdir(os.path.join(base_path, directory))]
trial_nums = sorted([int(directory[-1])
for directory in trial_dirs if directory.startswith("run") and directory[-1].isdigit()])
next_trial_num = trial_nums[-1] + 1 if trial_nums else 1
new_trial_path = os.path.join(base_path, f"run{next_trial_num}")
os.makedirs(new_trial_path)
return new_trial_path
def setup_dirs():
home_folder = os.path.dirname(os.path.abspath(__file__))
chkpt_path = os.path.join(home_folder, 'chkpts')
chkpt_path = set_directories(chkpt_path)
figure_path = os.path.join(home_folder, 'figures')
figure_path = set_directories(figure_path)
return chkpt_path, figure_path

13
game.py
View file

@ -1,16 +1,19 @@
from configs.system.window_config import WIDTH,\ import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
from config.system.window import WIDTH,\
HEIGHT,\ HEIGHT,\
WATER_COLOR,\ WATER_COLOR,\
FPS FPS
from level import Level from level import Level
import pygame import pygame
import sys import sys
import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
class Game:
class Pneuma:
def __init__(self, show_pg=False, n_players=1,): def __init__(self, show_pg=False, n_players=1,):
print(f"Initializing Pneuma with {n_players} player(s).\ print(f"Initializing Pneuma with {n_players} player(s).\

View file

@ -1,7 +1,7 @@
import pygame import pygame
from configs.game.weapon_config import weapon_data from config.game.weapon_config import weapon_data
from configs.game.spell_config import magic_data from config.game.spell_config import magic_data
from .ui_settings import UI_FONT,\ from .ui_settings import UI_FONT,\
UI_FONT_SIZE,\ UI_FONT_SIZE,\

View file

@ -4,14 +4,13 @@ import numpy as np
from random import choice from random import choice
from configs.system.window_config import TILESIZE from config.system.window import TILESIZE
from utils.debug import debug from utils.debug import debug
from utils.resource_loader import import_csv_layout, import_folder from utils.resource_loader import import_csv_layout, import_folder
from interface.ui import UI from interface.ui import UI
from entities.observer import Observer
from entities.player import Player from entities.player import Player
from entities.enemy import Enemy from entities.enemy import Enemy
from entities.terrain import Terrain from entities.terrain import Terrain

185
main.py Normal file
View file

@ -0,0 +1,185 @@
import os
import random
import torch as T
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import args
import folder_struct
import utils.seeds as seeds
import utils.metrics as metrics
from game import Pneuma
def main():
parsed_args = args.parse_args()
if not parsed_args.no_seed:
seeds.set_seeds(parsed_args.seed)
print(f"Seed set as {parsed_args.seed}")
else:
print("No seed set")
chkpt_path, figure_path = folder_struct.setup_dirs()
n_episodes = parsed_args.n_episodes
episode_length = parsed_args.ep_length
n_agents = parsed_args.n_agents
horizon = parsed_args.horizon
no_training = parsed_args.no_training
learnings_per_episode = int(episode_length/horizon)
learn_iters = 0
show_pygame = parsed_args.show_pg
# Setup AI metrics
# Setup parameter monitoring
score_history = np.zeros(
shape=(parsed_args.n_agents, parsed_args.n_episodes))
best_score = np.zeros(parsed_args.n_agents)
actor_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
critic_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
total_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
entropy = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
advantage = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
# score_history, best_score, actor_loss, critic_loss, total_loss, entropy, advantage = metrics.generate(parsed_args)
game = Pneuma(show_pg=show_pygame, n_players=parsed_args.n_agents)
print("Initializing agents ...")
for player in tqdm(game.level.player_sprites,
dynamic_ncols=True):
player.setup_agent(
gamma=parsed_args.gamma,
alpha=parsed_args.alpha,
policy_clip=parsed_args.policy_clip,
batch_size=parsed_args.batch_size,
n_epochs=parsed_args.n_epochs,
gae_lambda=parsed_args.gae_lambda,
entropy_coef=parsed_args.entropy_coeff,
chkpt_dir=chkpt_path,
no_load=True
)
# Episodes start
for episode in tqdm(range(n_episodes),
dynamic_ncols=True):
game.level.reset()
episode_reward = np.zeros(
shape=(n_agents, episode_length))
episode_actor_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
episode_critic_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
episode_total_loss = np.zeros(
shape=(n_agents, learnings_per_episode))
# Main game loop
for step in tqdm(range(episode_length),
leave=False,
ascii=True,
dynamic_ncols=True):
if not game.level.done:
game.run()
for player in game.level.player_sprites:
episode_reward[player.player_id][step] = player.reward
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
player.agent.learn()
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.actor_loss
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.critic_loss
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.total_loss
learn_iters += 1
# Gather information about the episode
for player in game.level.player_sprites:
score = np.mean(episode_reward[player.player_id])
# Update score
score_history[player.player_id][episode] = score
# Update actor/critic loss
actor_loss[player.player_id][episode] = np.mean(
episode_actor_loss)
critic_loss[player.player_id][episode] = np.mean(
episode_critic_loss)
total_loss[player.player_id][episode] = np.mean(
episode_total_loss)
# Check for new best score
if score > best_score[player.player_id]:
print(f"\nEpisode:\
{episode}\
\nNew best score for player {player.player_id}:\
{score}\
\nOld best score for player {player.player_id}: \
{best_score[player.player_id]}")
best_score[player.player_id] = score
print(f"Saving models for player {player.player_id}...")
# Save models
player.agent.save_models(
f"A{player.player_id}",
f"C{player.player_id}")
print(f"Models saved to {chkpt_path}")
metrics.plot_learning_curve(score_history, parsed_args.n_agents, figure_path)
metrics.plot_loss('actor', actor_loss, parsed_args.n_agents, figure_path)
metrics.plot_loss('critic', critic_loss, parsed_args.n_agents, figure_path)
metrics.plot_parameter('entropy', entropy, parsed_args.n_agents, figure_path)
metrics.plot_parameter('advantage', advantage, parsed_args.n_agents, figure_path)
# End of training session
print("End of episodes.\
\nExiting game...")
game.quit()
if __name__ == '__main__':
main()

293
pneuma.py
View file

@ -1,293 +0,0 @@
import os
import random
import argparse
import torch as T
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from game import Game
if __name__ == "__main__":
# Create parser
parser = argparse.ArgumentParser(
prog='Pneuma',
description='A Reinforcement Learning platform made with PyGame'
)
# Add args
parser.add_argument('--no_seed',
default=False,
action="store_true",
help="Set to True to run without a seed.")
parser.add_argument('--seed',
type=int,
default=1,
help="The seed for the RNG.")
parser.add_argument('--n_episodes',
type=int,
default=300,
help="Number of episodes.")
parser.add_argument('--ep_length',
type=int,
default=5000,
help="Length of each episode.")
parser.add_argument('--n_players',
type=int,
default=1,
help="Number of players.")
parser.add_argument('--chkpt_path',
type=str,
default="agents/saved_models",
help="Save/load location for agent models.")
parser.add_argument('--figure_path',
type=str,
default="figures",
help="Save location for figures.")
parser.add_argument('--horizon',
type=int,
default=2048,
help="The number of steps per update")
parser.add_argument('--show_pg',
default=False,
action="store_true",
help="Set to True to open PyGame window on desktop")
parser.add_argument('--no_load',
default=False,
action="store_true",
help="Set to True to ignore saved models")
parser.add_argument('--gamma',
type=float,
default=0.99,
help="The gamma parameter for PPO")
parser.add_argument('--entropy',
type=float,
default=0.01,
help="The entropy coefficient")
parser.add_argument('--alpha',
type=float,
default=0.0003,
help="The alpha parameter for PPO")
parser.add_argument('--policy_clip',
type=float,
default=0.1,
help="The policy clip")
parser.add_argument('--batch_size',
type=int,
default=128,
help="The size of each batch")
parser.add_argument('--n_epochs',
type=int,
default=20,
help="The number of epochs")
parser.add_argument('--gae_lambda',
type=float,
default=0.95,
help="The lambda parameter of the GAE")
parser.add_argument('--no_training',
default=False,
action="store_true",
help="Decides if the algorithm should train.")
args = parser.parse_args()
random.seed(args.seed)
np.random.seed(args.seed)
T.manual_seed(args.seed)
n_episodes = args.n_episodes
episode_length = args.ep_length
n_players = args.n_players
home_folder = os.path.dirname(os.path.abspath(__file__))
chkpt_path = os.path.join(home_folder, args.chkpt_path)
figure_path = os.path.join(home_folder, args.figure_path)
horizon = args.horizon
no_training = args.no_training
learnings_per_episode = int(episode_length/horizon)
learn_iters = 0
show_pygame = args.show_pg
# Setup AI stuff
score_history = np.zeros(shape=(n_players, n_episodes))
best_score = np.zeros(n_players)
actor_loss = np.zeros(shape=(n_players,
n_episodes))
critic_loss = np.zeros(shape=(n_players,
n_episodes))
total_loss = np.zeros(shape=(n_players,
n_episodes))
game = Game(show_pg=show_pygame, n_players=n_players)
print("Initializing agents ...")
for player in tqdm(game.level.player_sprites,
dynamic_ncols=True):
player.setup_agent(
gamma=args.gamma,
alpha=args.alpha,
policy_clip=args.policy_clip,
batch_size=args.batch_size,
n_epochs=args.n_epochs,
gae_lambda=args.gae_lambda,
entropy_coef=args.entropy,
chkpt_dir=chkpt_path,
no_load=args.no_load
)
# Episodes start
for episode in tqdm(range(n_episodes),
dynamic_ncols=True):
game.level.reset()
episode_reward = np.zeros(
shape=(n_players, episode_length))
episode_actor_loss = np.zeros(
shape=(n_players, learnings_per_episode))
episode_critic_loss = np.zeros(
shape=(n_players, learnings_per_episode))
episode_total_loss = np.zeros(
shape=(n_players, learnings_per_episode))
# Main game loop
for step in tqdm(range(episode_length),
leave=False,
ascii=True,
dynamic_ncols=True):
if not game.level.done:
game.run()
for player in game.level.player_sprites:
episode_reward[player.player_id][step] = player.reward
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
player.agent.learn()
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.actor_loss
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.critic_loss
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
= player.agent.total_loss
learn_iters += 1
# Gather information about the episode
for player in game.level.player_sprites:
score = np.mean(episode_reward[player.player_id])
# Update score
score_history[player.player_id][episode] = score
# Update actor/critic loss
actor_loss[player.player_id][episode] = np.mean(
episode_actor_loss)
critic_loss[player.player_id][episode] = np.mean(
episode_critic_loss)
total_loss[player.player_id][episode] = np.mean(
episode_total_loss)
# Check for new best score
if score > best_score[player.player_id]:
print(f"\nEpisode:\
{episode}\
\nNew best score for player {player.player_id}:\
{score}\
\nOld best score for player {player.player_id}: \
{best_score[player.player_id]}")
best_score[player.player_id] = score
print(f"Saving models for player {player.player_id}...")
# Save models
player.agent.save_models(
f"A{player.player_id}",
f"C{player.player_id}")
print(f"Models saved to {chkpt_path}")
plt.figure()
plt.title("Agent Rewards")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(n_players)])
for player_score in score_history:
plt.plot(player_score)
plt.savefig(os.path.join(figure_path, 'score.png'))
plt.close()
plt.figure()
plt.suptitle("Actor Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for actor in actor_loss:
plt.plot(actor)
plt.savefig(os.path.join(figure_path, 'actor_loss.png'))
plt.close()
plt.figure()
plt.suptitle("Critic Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for critic in critic_loss:
plt.plot(critic)
plt.savefig(os.path.join(figure_path, 'critic_loss.png'))
plt.close()
plt.figure()
plt.suptitle("Total Loss")
plt.xlabel("Episode")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(n_players)])
for total in total_loss:
plt.plot(total)
plt.savefig(os.path.join(figure_path, 'total_loss.png'))
plt.close()
# End of training session
print("End of episodes.\
\nExiting game...")
game.quit()

16
utils/hyperparams.py Normal file
View file

@ -0,0 +1,16 @@
HPARAMS = {
"horizon": 2048,
"num_epochs": 15,
"batch_size": 128,
"policy_clip": 0.1,
"discount_factor": 0.99,
"GAE_lambda": 0.95,
"entropy_coeff": 0.01,
"value_coeff": 0.5,
"learning_rate": 0.0003,
}

92
utils/metrics.py Normal file
View file

@ -0,0 +1,92 @@
import os
import numpy as np
import matplotlib.pyplot as plt
def generate(parsed_args):
# Setup parameter monitoring
score_history = np.zeros(
shape=(parsed_args.n_agents, parsed_args.n_episodes))
best_score = np.zeros(parsed_args.n_agents)
actor_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
critic_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
total_loss = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
entropy = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
advantage = np.zeros(shape=(parsed_args.n_agents,
parsed_args.n_episodes))
return score_history, best_score, actor_loss,
critic_loss, total_loss, entropy,
advantage
def plot_learning_curve(scores, num_players, figure_path):
plt.figure()
plt.title("Running Average - Score")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(num_players)])
for score in scores:
running_avg = np.zeros(len(score))
for i in range(len(score)):
running_avg[i] = np.mean(score[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, "avg_score.png"))
plt.close()
def plot_score(scores, num_players, figure_path):
plt.figure()
plt.title("Agent Rewards - No Averaging")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.legend([f"Agent {num}" for num in range(num_players)])
for player_score in scores:
plt.plot(player_score)
plt.savefig(os.path.join(figure_path, 'score.png'))
plt.close()
def plot_loss(nn_type, losses, num_players, figure_path):
plt.figure()
plt.title(f"Running Average - {nn_type.capitalize()} Loss")
plt.xlabel("Learning Iterations")
plt.ylabel("Loss")
plt.legend([f"Agent {num}" for num in range(num_players)])
for loss in losses:
running_avg = np.zeros(len(loss))
for i in range(len(loss)):
running_avg[i] = np.mean(loss[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, f"{nn_type}_loss.png"))
plt.close()
def plot_parameter(name, parameter, num_players, figure_path):
plt.figure()
plt.title(f"Running Average - {name.capitalize()}")
plt.xlabel("Learning Iterations")
plt.ylabel(f"{name.capitalize()}")
plt.legend([f"Agent {num}" for num in range(num_players)])
for param in parameter:
running_avg = np.zeros(len(param))
for i in range(len(param)):
running_avg[i] = np.mean(param[max(0, i-100):(i+1)])
plt.plot(running_avg)
plt.savefig(os.path.join(figure_path, f"{name}.png"))
plt.close()

11
utils/seeds.py Normal file
View file

@ -0,0 +1,11 @@
import random
import torch as T
import numpy as np
def set_seeds(value):
random.seed(value)
np.random.seed(value)
T.manual_seed(value)
T.cuda.manual_seed_all(value)