Big update
BIN
.DS_Store
vendored
BIN
agents/.DS_Store
vendored
BIN
agents/ppo/.DS_Store
vendored
|
@ -1 +0,0 @@
|
||||||
# This is a folder with all the saved models.
|
|
91
args.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
import argparse
|
||||||
|
from utils.hyperparams import HPARAMS
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog='Pneuma',
|
||||||
|
description='A Reinforcement Learning platform made with PyGame'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define seed
|
||||||
|
parser.add_argument('--no_seed',
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="Set to True to run without a seed.")
|
||||||
|
|
||||||
|
parser.add_argument('--seed',
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="The seed for the RNG.")
|
||||||
|
|
||||||
|
# Define episodes and agents
|
||||||
|
parser.add_argument('--n_episodes',
|
||||||
|
type=int,
|
||||||
|
default=300,
|
||||||
|
help="Number of episodes.")
|
||||||
|
|
||||||
|
parser.add_argument('--ep_length',
|
||||||
|
type=int,
|
||||||
|
default=5000,
|
||||||
|
help="Length of each episode.")
|
||||||
|
|
||||||
|
parser.add_argument('--n_agents',
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="Number of agents.")
|
||||||
|
|
||||||
|
# Define hyperparameters
|
||||||
|
parser.add_argument('--horizon',
|
||||||
|
type=int,
|
||||||
|
default=HPARAMS["horizon"],
|
||||||
|
help="The number of steps per update")
|
||||||
|
|
||||||
|
parser.add_argument('--gamma',
|
||||||
|
type=float,
|
||||||
|
default=HPARAMS["discount_factor"],
|
||||||
|
help="The discount factor for PPO")
|
||||||
|
|
||||||
|
parser.add_argument('--entropy_coeff',
|
||||||
|
type=float,
|
||||||
|
default=HPARAMS["entropy_coeff"],
|
||||||
|
help="The entropy coefficient")
|
||||||
|
|
||||||
|
parser.add_argument('--alpha',
|
||||||
|
type=float,
|
||||||
|
default=HPARAMS["learning_rate"],
|
||||||
|
help="The learning_rate for PPO")
|
||||||
|
|
||||||
|
parser.add_argument('--policy_clip',
|
||||||
|
type=float,
|
||||||
|
default=HPARAMS["policy_clip"],
|
||||||
|
help="The policy clip for PPO")
|
||||||
|
|
||||||
|
parser.add_argument('--batch_size',
|
||||||
|
type=int,
|
||||||
|
default=HPARAMS["batch_size"],
|
||||||
|
help="The size of each batch")
|
||||||
|
|
||||||
|
parser.add_argument('--n_epochs',
|
||||||
|
type=int,
|
||||||
|
default=HPARAMS["num_epochs"],
|
||||||
|
help="The number of epochs")
|
||||||
|
|
||||||
|
parser.add_argument('--gae_lambda',
|
||||||
|
type=float,
|
||||||
|
default=HPARAMS["GAE_lambda"],
|
||||||
|
help="The lambda parameter of the GAE")
|
||||||
|
|
||||||
|
# Misc
|
||||||
|
parser.add_argument('--no_training',
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="Set flag to disable learning. Useful for viewing trained agents interact in the environment.")
|
||||||
|
|
||||||
|
parser.add_argument('--show_pg',
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="Set flag to open a PyGame window on desktop")
|
||||||
|
|
||||||
|
return parser.parse_args()
|
BIN
chkpts/run2/A0
Normal file
BIN
chkpts/run2/C0
Normal file
BIN
configs/.DS_Store
vendored
|
@ -1,7 +1,7 @@
|
||||||
import pygame
|
import pygame
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
from configs.system.window_config import TILESIZE
|
from config.system.window import TILESIZE
|
||||||
|
|
||||||
|
|
||||||
class MagicPlayer:
|
class MagicPlayer:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import pygame
|
import pygame
|
||||||
from random import randint, choice
|
from random import randint, choice
|
||||||
|
|
||||||
from configs.game.spell_config import magic_data
|
from config.game.spell_config import magic_data
|
||||||
from configs.game.weapon_config import weapon_data
|
from config.game.weapon_config import weapon_data
|
||||||
|
|
||||||
from .movement import MovementHandler
|
from .movement import MovementHandler
|
||||||
from .combat import CombatHandler
|
from .combat import CombatHandler
|
||||||
|
|
|
@ -4,7 +4,7 @@ from math import sin
|
||||||
|
|
||||||
from utils.resource_loader import import_folder, import_assets
|
from utils.resource_loader import import_folder, import_assets
|
||||||
|
|
||||||
from configs.system.window_config import HITBOX_OFFSET
|
from config.system.window import HITBOX_OFFSET
|
||||||
|
|
||||||
|
|
||||||
class AnimationHandler:
|
class AnimationHandler:
|
|
@ -1,8 +1,8 @@
|
||||||
from effects.weapon_effects import Weapon
|
from effects.weapon_effects import Weapon
|
||||||
from effects.magic_effects import MagicPlayer
|
from effects.magic_effects import MagicPlayer
|
||||||
|
|
||||||
from configs.game.weapon_config import weapon_data
|
from config.game.weapon_config import weapon_data
|
||||||
from configs.game.spell_config import magic_data
|
from config.game.spell_config import magic_data
|
||||||
|
|
||||||
|
|
||||||
class CombatHandler:
|
class CombatHandler:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from configs.game.player_config import warrior_stats, mage_stats, tank_stats
|
from config.game.player_config import warrior_stats, mage_stats, tank_stats
|
||||||
from configs.game.monster_config import monster_data
|
from config.game.monster_config import monster_data
|
||||||
|
|
||||||
|
|
||||||
class StatsHandler:
|
class StatsHandler:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
from .components.animaton import AnimationHandler
|
from .components.animation import AnimationHandler
|
||||||
from .components.stats import StatsHandler
|
from .components.stats import StatsHandler
|
||||||
from .components._input import InputHandler
|
from .components._input import InputHandler
|
||||||
|
|
||||||
|
|
|
@ -2,16 +2,16 @@ import pygame
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
from configs.game.weapon_config import weapon_data
|
from config.game.weapon_config import weapon_data
|
||||||
from configs.game.spell_config import magic_data
|
from config.game.spell_config import magic_data
|
||||||
|
|
||||||
from .components.stats import StatsHandler
|
from .components.stats import StatsHandler
|
||||||
from .components._input import InputHandler
|
from .components._input import InputHandler
|
||||||
from .components.animaton import AnimationHandler
|
from .components.animation import AnimationHandler
|
||||||
|
|
||||||
from effects.particle_effects import AnimationPlayer
|
from effects.particle_effects import AnimationPlayer
|
||||||
|
|
||||||
from agents.ppo.agent import Agent
|
from ml.ppo.agent import Agent
|
||||||
|
|
||||||
|
|
||||||
class Player(pygame.sprite.Sprite):
|
class Player(pygame.sprite.Sprite):
|
||||||
|
|
|
@ -1,257 +0,0 @@
|
||||||
import pygame
|
|
||||||
import numpy as np
|
|
||||||
from random import randint
|
|
||||||
|
|
||||||
from configs.game.weapon_config import weapon_data
|
|
||||||
from configs.game.spell_config import magic_data
|
|
||||||
|
|
||||||
from .components.stats import StatsHandler
|
|
||||||
from .components._input import InputHandler
|
|
||||||
from .components.animaton import AnimationHandler
|
|
||||||
|
|
||||||
from effects.particle_effects import AnimationPlayer
|
|
||||||
|
|
||||||
from agents.ppo.agent import Agent
|
|
||||||
|
|
||||||
|
|
||||||
class Player(pygame.sprite.Sprite):
|
|
||||||
def __init__(self,
|
|
||||||
player_id,
|
|
||||||
role,
|
|
||||||
position,
|
|
||||||
map_edge,
|
|
||||||
groups,
|
|
||||||
obstacle_sprites,
|
|
||||||
visible_sprites,
|
|
||||||
attack_sprites,
|
|
||||||
attackable_sprites
|
|
||||||
):
|
|
||||||
super().__init__(groups)
|
|
||||||
|
|
||||||
self.initial_position = position
|
|
||||||
self.map_edge = map_edge
|
|
||||||
self.player_id = player_id
|
|
||||||
self.distance_direction_from_enemy = None
|
|
||||||
|
|
||||||
# Sprite Setup
|
|
||||||
self.sprite_type = "player"
|
|
||||||
self.obstacle_sprites = obstacle_sprites
|
|
||||||
self.visible_sprites = visible_sprites
|
|
||||||
self.attack_sprites = attack_sprites
|
|
||||||
self.attackable_sprites = attackable_sprites
|
|
||||||
|
|
||||||
# Graphics Setup
|
|
||||||
self.animation_player = AnimationPlayer()
|
|
||||||
self.animation = AnimationHandler(self.sprite_type)
|
|
||||||
self.animation.import_assets(position)
|
|
||||||
# Input Setup
|
|
||||||
self._input = InputHandler(
|
|
||||||
self.sprite_type, self.animation_player)
|
|
||||||
|
|
||||||
# Setup Stats
|
|
||||||
self.role = role
|
|
||||||
self.stats = StatsHandler(self.sprite_type, self.role)
|
|
||||||
|
|
||||||
def setup_agent(self,
|
|
||||||
gamma,
|
|
||||||
alpha,
|
|
||||||
policy_clip,
|
|
||||||
batch_size,
|
|
||||||
n_epochs,
|
|
||||||
gae_lambda,
|
|
||||||
chkpt_dir,
|
|
||||||
entropy_coef,
|
|
||||||
no_load=False):
|
|
||||||
|
|
||||||
self.max_num_enemies = len(self.distance_direction_from_enemy)
|
|
||||||
self.get_current_state()
|
|
||||||
self.num_features = len(self.state_features)
|
|
||||||
|
|
||||||
self.agent = Agent(
|
|
||||||
input_dims=self.num_features,
|
|
||||||
n_actions=len(self._input.possible_actions),
|
|
||||||
gamma=gamma,
|
|
||||||
alpha=alpha,
|
|
||||||
policy_clip=policy_clip,
|
|
||||||
batch_size=batch_size,
|
|
||||||
n_epochs=n_epochs,
|
|
||||||
gae_lambda=gae_lambda,
|
|
||||||
entropy_coef=entropy_coef,
|
|
||||||
chkpt_dir=chkpt_dir
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"\nAgent initialized on player {self.player_id} using {self.agent.actor.device}.")
|
|
||||||
|
|
||||||
if not no_load:
|
|
||||||
print("Attempting to load models ...")
|
|
||||||
try:
|
|
||||||
self.agent.load_models(
|
|
||||||
actr_chkpt=f"A{self.player_id}",
|
|
||||||
crtc_chkpt=f"C{self.player_id}"
|
|
||||||
)
|
|
||||||
print("Models loaded ...\n")
|
|
||||||
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(
|
|
||||||
f"FileNotFound for player {self.player_id}.\
|
|
||||||
\nSkipping loading ...\n")
|
|
||||||
|
|
||||||
def get_status(self):
|
|
||||||
if self._input.movement.direction.x == 0\
|
|
||||||
and self._input.movement.direction.y == 0:
|
|
||||||
|
|
||||||
if 'idle' not in self._input.status and 'attack' not in self._input.status:
|
|
||||||
self._input.status += '_idle'
|
|
||||||
|
|
||||||
if self._input.attacking:
|
|
||||||
self._input.movement.direction.x = 0
|
|
||||||
self._input.movement.direction.y = 0
|
|
||||||
if 'attack' not in self._input.status:
|
|
||||||
if 'idle' in self._input.status:
|
|
||||||
self._input.status = self._input.status.replace(
|
|
||||||
'idle', 'attack')
|
|
||||||
else:
|
|
||||||
self._input.status += '_attack'
|
|
||||||
else:
|
|
||||||
if 'attack' in self._input.status:
|
|
||||||
self._input.status = self._input.status.replace('_attack', '')
|
|
||||||
|
|
||||||
def attack_logic(self):
|
|
||||||
if self.attack_sprites:
|
|
||||||
for attack_sprite in self.attack_sprites:
|
|
||||||
collision_sprites = pygame.sprite.spritecollide(
|
|
||||||
attack_sprite, self.attackable_sprites, False)
|
|
||||||
if collision_sprites:
|
|
||||||
for target_sprite in collision_sprites:
|
|
||||||
if target_sprite.sprite_type == 'grass':
|
|
||||||
pos = target_sprite.rect.center
|
|
||||||
offset = pygame.math.Vector2(0, 75)
|
|
||||||
for leaf in range(randint(3, 6)):
|
|
||||||
self.animation_player.create_grass_particles(
|
|
||||||
position=pos - offset,
|
|
||||||
groups=[self.visible_sprites])
|
|
||||||
|
|
||||||
target_sprite.kill()
|
|
||||||
else:
|
|
||||||
target_sprite.get_damaged(
|
|
||||||
self, attack_sprite.sprite_type)
|
|
||||||
|
|
||||||
def get_full_weapon_damage(self):
|
|
||||||
base_damage = self.stats.attack
|
|
||||||
weapon_damage = weapon_data[self._input.combat.weapon]['damage']
|
|
||||||
return (base_damage + weapon_damage)
|
|
||||||
|
|
||||||
def get_full_magic_damage(self):
|
|
||||||
base_damage = self.stats.magic
|
|
||||||
spell_damage = magic_data[self._input.combat.magic]['strength']
|
|
||||||
return (base_damage + spell_damage)
|
|
||||||
|
|
||||||
def get_reward(self):
|
|
||||||
|
|
||||||
self.reward = 0
|
|
||||||
|
|
||||||
# Base reward on player exp
|
|
||||||
self.reward += self.stats.exp
|
|
||||||
print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
|
|
||||||
|
|
||||||
# Add relative hp of player
|
|
||||||
self.reward += self.stats.health/self.stats.stats['health']
|
|
||||||
print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
|
|
||||||
|
|
||||||
# Take into account distance of nearest enemy from player relative to the map length
|
|
||||||
self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
|
|
||||||
print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
|
|
||||||
|
|
||||||
# Take into account nearest enemy relative health
|
|
||||||
self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
|
|
||||||
print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
|
|
||||||
|
|
||||||
def get_current_state(self):
|
|
||||||
|
|
||||||
if self.distance_direction_from_enemy != []:
|
|
||||||
sorted_distances = sorted(
|
|
||||||
self.distance_direction_from_enemy, key=lambda x: x[0])
|
|
||||||
else:
|
|
||||||
sorted_distances = np.zeros(self.num_features)
|
|
||||||
|
|
||||||
self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
|
|
||||||
|
|
||||||
self.action_features = [self._input.action]
|
|
||||||
|
|
||||||
self.get_reward()
|
|
||||||
|
|
||||||
self.state_features = [
|
|
||||||
self.animation.rect.center[0]/self.map_edge[0],
|
|
||||||
self.animation.rect.center[1]/self.map_edge[1],
|
|
||||||
self._input.movement.direction.x,
|
|
||||||
self._input.movement.direction.y,
|
|
||||||
self.stats.health/self.stats.stats['health'],
|
|
||||||
self.stats.energy/self.stats.stats['energy'],
|
|
||||||
1 if 'attack' in self._input.status else 0,
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
for distance, direction, enemy in self.distance_direction_from_enemy:
|
|
||||||
self.state_features.extend([
|
|
||||||
|
|
||||||
distance/np.sqrt(np.sum(self.map_edge)),
|
|
||||||
|
|
||||||
direction[0],
|
|
||||||
|
|
||||||
direction[1],
|
|
||||||
|
|
||||||
enemy.stats.health /
|
|
||||||
enemy.stats.monster_info['health'],
|
|
||||||
|
|
||||||
enemy.stats.exp,
|
|
||||||
])
|
|
||||||
|
|
||||||
if hasattr(self, 'num_features'):
|
|
||||||
while len(self.state_features) < self.num_features:
|
|
||||||
self.state_features.append(0)
|
|
||||||
|
|
||||||
self.state_features = np.array(self.state_features)
|
|
||||||
|
|
||||||
def is_dead(self):
|
|
||||||
if self.stats.health <= 0:
|
|
||||||
self.stats.health = 0
|
|
||||||
self.animation.import_assets((3264, 448))
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def agent_update(self):
|
|
||||||
|
|
||||||
# Get the current state
|
|
||||||
self.get_current_state()
|
|
||||||
|
|
||||||
# Choose action based on current state
|
|
||||||
action, probs, value\
|
|
||||||
= self.agent.choose_action(self.state_features)
|
|
||||||
|
|
||||||
# Apply chosen action
|
|
||||||
self._input.check_input(action,
|
|
||||||
self.stats.speed,
|
|
||||||
self.animation.hitbox,
|
|
||||||
self.obstacle_sprites,
|
|
||||||
self.animation.rect,
|
|
||||||
self)
|
|
||||||
|
|
||||||
self.agent.remember(self.state_features, action,
|
|
||||||
probs, value, self.reward, self.is_dead())
|
|
||||||
|
|
||||||
self.get_current_state()
|
|
||||||
|
|
||||||
def update(self):
|
|
||||||
|
|
||||||
self.agent_update()
|
|
||||||
|
|
||||||
# Cooldowns and Regen
|
|
||||||
self.stats.health_recovery()
|
|
||||||
self.stats.energy_recovery()
|
|
||||||
|
|
||||||
# Refresh player based on input and animate
|
|
||||||
self.get_status()
|
|
||||||
self.animation.animate(
|
|
||||||
self._input.status, self._input.combat.vulnerable)
|
|
||||||
self._input.cooldowns(self._input.combat.vulnerable)
|
|
|
@ -1,6 +1,6 @@
|
||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
from configs.system.window_config import TILESIZE,\
|
from config.system.window import TILESIZE,\
|
||||||
HITBOX_OFFSET
|
HITBOX_OFFSET
|
||||||
|
|
||||||
|
|
||||||
|
|
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 31 KiB |
BIN
figures/run1/actor_loss.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
figures/run1/advantage.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run1/avg_score.png
Normal file
After Width: | Height: | Size: 19 KiB |
BIN
figures/run1/critic_loss.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run1/entropy.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
figures/run3/actor_loss.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
figures/run3/advantage.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run3/avg_score.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
figures/run3/critic_loss.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
figures/run3/entropy.png
Normal file
After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 51 KiB |
Before Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 30 KiB |
30
folder_struct.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def set_directories(base_path):
|
||||||
|
|
||||||
|
if not os.path.exists(base_path):
|
||||||
|
os.makedirs(base_path)
|
||||||
|
|
||||||
|
trial_dirs = [directory for directory in os.listdir(
|
||||||
|
base_path) if os.path.isdir(os.path.join(base_path, directory))]
|
||||||
|
trial_nums = sorted([int(directory[-1])
|
||||||
|
for directory in trial_dirs if directory.startswith("run") and directory[-1].isdigit()])
|
||||||
|
next_trial_num = trial_nums[-1] + 1 if trial_nums else 1
|
||||||
|
new_trial_path = os.path.join(base_path, f"run{next_trial_num}")
|
||||||
|
|
||||||
|
os.makedirs(new_trial_path)
|
||||||
|
return new_trial_path
|
||||||
|
|
||||||
|
|
||||||
|
def setup_dirs():
|
||||||
|
|
||||||
|
home_folder = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
chkpt_path = os.path.join(home_folder, 'chkpts')
|
||||||
|
chkpt_path = set_directories(chkpt_path)
|
||||||
|
|
||||||
|
figure_path = os.path.join(home_folder, 'figures')
|
||||||
|
figure_path = set_directories(figure_path)
|
||||||
|
|
||||||
|
return chkpt_path, figure_path
|
13
game.py
|
@ -1,16 +1,19 @@
|
||||||
from configs.system.window_config import WIDTH,\
|
import os
|
||||||
|
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
|
||||||
|
|
||||||
|
from config.system.window import WIDTH,\
|
||||||
HEIGHT,\
|
HEIGHT,\
|
||||||
WATER_COLOR,\
|
WATER_COLOR,\
|
||||||
FPS
|
FPS
|
||||||
from level import Level
|
from level import Level
|
||||||
import pygame
|
import pygame
|
||||||
import sys
|
import sys
|
||||||
import os
|
|
||||||
|
|
||||||
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
|
|
||||||
|
|
||||||
|
|
||||||
class Game:
|
|
||||||
|
|
||||||
|
|
||||||
|
class Pneuma:
|
||||||
|
|
||||||
def __init__(self, show_pg=False, n_players=1,):
|
def __init__(self, show_pg=False, n_players=1,):
|
||||||
print(f"Initializing Pneuma with {n_players} player(s).\
|
print(f"Initializing Pneuma with {n_players} player(s).\
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import pygame
|
import pygame
|
||||||
|
|
||||||
from configs.game.weapon_config import weapon_data
|
from config.game.weapon_config import weapon_data
|
||||||
from configs.game.spell_config import magic_data
|
from config.game.spell_config import magic_data
|
||||||
|
|
||||||
from .ui_settings import UI_FONT,\
|
from .ui_settings import UI_FONT,\
|
||||||
UI_FONT_SIZE,\
|
UI_FONT_SIZE,\
|
||||||
|
|
3
level.py
|
@ -4,14 +4,13 @@ import numpy as np
|
||||||
|
|
||||||
from random import choice
|
from random import choice
|
||||||
|
|
||||||
from configs.system.window_config import TILESIZE
|
from config.system.window import TILESIZE
|
||||||
|
|
||||||
from utils.debug import debug
|
from utils.debug import debug
|
||||||
from utils.resource_loader import import_csv_layout, import_folder
|
from utils.resource_loader import import_csv_layout, import_folder
|
||||||
|
|
||||||
from interface.ui import UI
|
from interface.ui import UI
|
||||||
|
|
||||||
from entities.observer import Observer
|
|
||||||
from entities.player import Player
|
from entities.player import Player
|
||||||
from entities.enemy import Enemy
|
from entities.enemy import Enemy
|
||||||
from entities.terrain import Terrain
|
from entities.terrain import Terrain
|
||||||
|
|
185
main.py
Normal file
|
@ -0,0 +1,185 @@
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import torch as T
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import args
|
||||||
|
import folder_struct
|
||||||
|
|
||||||
|
import utils.seeds as seeds
|
||||||
|
import utils.metrics as metrics
|
||||||
|
|
||||||
|
from game import Pneuma
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
parsed_args = args.parse_args()
|
||||||
|
|
||||||
|
if not parsed_args.no_seed:
|
||||||
|
seeds.set_seeds(parsed_args.seed)
|
||||||
|
print(f"Seed set as {parsed_args.seed}")
|
||||||
|
else:
|
||||||
|
print("No seed set")
|
||||||
|
|
||||||
|
chkpt_path, figure_path = folder_struct.setup_dirs()
|
||||||
|
|
||||||
|
n_episodes = parsed_args.n_episodes
|
||||||
|
episode_length = parsed_args.ep_length
|
||||||
|
n_agents = parsed_args.n_agents
|
||||||
|
|
||||||
|
horizon = parsed_args.horizon
|
||||||
|
no_training = parsed_args.no_training
|
||||||
|
|
||||||
|
learnings_per_episode = int(episode_length/horizon)
|
||||||
|
learn_iters = 0
|
||||||
|
|
||||||
|
show_pygame = parsed_args.show_pg
|
||||||
|
|
||||||
|
# Setup AI metrics
|
||||||
|
|
||||||
|
# Setup parameter monitoring
|
||||||
|
score_history = np.zeros(
|
||||||
|
shape=(parsed_args.n_agents, parsed_args.n_episodes))
|
||||||
|
|
||||||
|
best_score = np.zeros(parsed_args.n_agents)
|
||||||
|
|
||||||
|
actor_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
critic_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
total_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
entropy = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
advantage = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
# score_history, best_score, actor_loss, critic_loss, total_loss, entropy, advantage = metrics.generate(parsed_args)
|
||||||
|
|
||||||
|
|
||||||
|
game = Pneuma(show_pg=show_pygame, n_players=parsed_args.n_agents)
|
||||||
|
|
||||||
|
print("Initializing agents ...")
|
||||||
|
for player in tqdm(game.level.player_sprites,
|
||||||
|
dynamic_ncols=True):
|
||||||
|
player.setup_agent(
|
||||||
|
gamma=parsed_args.gamma,
|
||||||
|
alpha=parsed_args.alpha,
|
||||||
|
policy_clip=parsed_args.policy_clip,
|
||||||
|
batch_size=parsed_args.batch_size,
|
||||||
|
n_epochs=parsed_args.n_epochs,
|
||||||
|
gae_lambda=parsed_args.gae_lambda,
|
||||||
|
entropy_coef=parsed_args.entropy_coeff,
|
||||||
|
chkpt_dir=chkpt_path,
|
||||||
|
no_load=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Episodes start
|
||||||
|
for episode in tqdm(range(n_episodes),
|
||||||
|
dynamic_ncols=True):
|
||||||
|
|
||||||
|
game.level.reset()
|
||||||
|
|
||||||
|
episode_reward = np.zeros(
|
||||||
|
shape=(n_agents, episode_length))
|
||||||
|
|
||||||
|
episode_actor_loss = np.zeros(
|
||||||
|
shape=(n_agents, learnings_per_episode))
|
||||||
|
|
||||||
|
episode_critic_loss = np.zeros(
|
||||||
|
shape=(n_agents, learnings_per_episode))
|
||||||
|
|
||||||
|
episode_total_loss = np.zeros(
|
||||||
|
shape=(n_agents, learnings_per_episode))
|
||||||
|
|
||||||
|
# Main game loop
|
||||||
|
for step in tqdm(range(episode_length),
|
||||||
|
leave=False,
|
||||||
|
ascii=True,
|
||||||
|
dynamic_ncols=True):
|
||||||
|
|
||||||
|
if not game.level.done:
|
||||||
|
game.run()
|
||||||
|
|
||||||
|
for player in game.level.player_sprites:
|
||||||
|
|
||||||
|
episode_reward[player.player_id][step] = player.reward
|
||||||
|
|
||||||
|
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
|
||||||
|
|
||||||
|
player.agent.learn()
|
||||||
|
|
||||||
|
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||||
|
= player.agent.actor_loss
|
||||||
|
|
||||||
|
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||||
|
= player.agent.critic_loss
|
||||||
|
|
||||||
|
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||||
|
= player.agent.total_loss
|
||||||
|
|
||||||
|
learn_iters += 1
|
||||||
|
|
||||||
|
# Gather information about the episode
|
||||||
|
for player in game.level.player_sprites:
|
||||||
|
|
||||||
|
score = np.mean(episode_reward[player.player_id])
|
||||||
|
|
||||||
|
# Update score
|
||||||
|
score_history[player.player_id][episode] = score
|
||||||
|
|
||||||
|
# Update actor/critic loss
|
||||||
|
actor_loss[player.player_id][episode] = np.mean(
|
||||||
|
episode_actor_loss)
|
||||||
|
|
||||||
|
critic_loss[player.player_id][episode] = np.mean(
|
||||||
|
episode_critic_loss)
|
||||||
|
|
||||||
|
total_loss[player.player_id][episode] = np.mean(
|
||||||
|
episode_total_loss)
|
||||||
|
|
||||||
|
# Check for new best score
|
||||||
|
if score > best_score[player.player_id]:
|
||||||
|
print(f"\nEpisode:\
|
||||||
|
{episode}\
|
||||||
|
\nNew best score for player {player.player_id}:\
|
||||||
|
{score}\
|
||||||
|
\nOld best score for player {player.player_id}: \
|
||||||
|
{best_score[player.player_id]}")
|
||||||
|
|
||||||
|
best_score[player.player_id] = score
|
||||||
|
|
||||||
|
print(f"Saving models for player {player.player_id}...")
|
||||||
|
|
||||||
|
# Save models
|
||||||
|
player.agent.save_models(
|
||||||
|
f"A{player.player_id}",
|
||||||
|
f"C{player.player_id}")
|
||||||
|
|
||||||
|
print(f"Models saved to {chkpt_path}")
|
||||||
|
|
||||||
|
metrics.plot_learning_curve(score_history, parsed_args.n_agents, figure_path)
|
||||||
|
|
||||||
|
metrics.plot_loss('actor', actor_loss, parsed_args.n_agents, figure_path)
|
||||||
|
|
||||||
|
metrics.plot_loss('critic', critic_loss, parsed_args.n_agents, figure_path)
|
||||||
|
|
||||||
|
metrics.plot_parameter('entropy', entropy, parsed_args.n_agents, figure_path)
|
||||||
|
|
||||||
|
metrics.plot_parameter('advantage', advantage, parsed_args.n_agents, figure_path)
|
||||||
|
|
||||||
|
# End of training session
|
||||||
|
print("End of episodes.\
|
||||||
|
\nExiting game...")
|
||||||
|
|
||||||
|
game.quit()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
293
pneuma.py
|
@ -1,293 +0,0 @@
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import argparse
|
|
||||||
import torch as T
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from game import Game
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
# Create parser
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog='Pneuma',
|
|
||||||
description='A Reinforcement Learning platform made with PyGame'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add args
|
|
||||||
parser.add_argument('--no_seed',
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="Set to True to run without a seed.")
|
|
||||||
|
|
||||||
parser.add_argument('--seed',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="The seed for the RNG.")
|
|
||||||
|
|
||||||
parser.add_argument('--n_episodes',
|
|
||||||
type=int,
|
|
||||||
default=300,
|
|
||||||
help="Number of episodes.")
|
|
||||||
|
|
||||||
parser.add_argument('--ep_length',
|
|
||||||
type=int,
|
|
||||||
default=5000,
|
|
||||||
help="Length of each episode.")
|
|
||||||
|
|
||||||
parser.add_argument('--n_players',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="Number of players.")
|
|
||||||
|
|
||||||
parser.add_argument('--chkpt_path',
|
|
||||||
type=str,
|
|
||||||
default="agents/saved_models",
|
|
||||||
help="Save/load location for agent models.")
|
|
||||||
|
|
||||||
parser.add_argument('--figure_path',
|
|
||||||
type=str,
|
|
||||||
default="figures",
|
|
||||||
help="Save location for figures.")
|
|
||||||
|
|
||||||
parser.add_argument('--horizon',
|
|
||||||
type=int,
|
|
||||||
default=2048,
|
|
||||||
help="The number of steps per update")
|
|
||||||
|
|
||||||
parser.add_argument('--show_pg',
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="Set to True to open PyGame window on desktop")
|
|
||||||
|
|
||||||
parser.add_argument('--no_load',
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="Set to True to ignore saved models")
|
|
||||||
|
|
||||||
parser.add_argument('--gamma',
|
|
||||||
type=float,
|
|
||||||
default=0.99,
|
|
||||||
help="The gamma parameter for PPO")
|
|
||||||
|
|
||||||
parser.add_argument('--entropy',
|
|
||||||
type=float,
|
|
||||||
default=0.01,
|
|
||||||
help="The entropy coefficient")
|
|
||||||
|
|
||||||
parser.add_argument('--alpha',
|
|
||||||
type=float,
|
|
||||||
default=0.0003,
|
|
||||||
help="The alpha parameter for PPO")
|
|
||||||
|
|
||||||
parser.add_argument('--policy_clip',
|
|
||||||
type=float,
|
|
||||||
default=0.1,
|
|
||||||
help="The policy clip")
|
|
||||||
|
|
||||||
parser.add_argument('--batch_size',
|
|
||||||
type=int,
|
|
||||||
default=128,
|
|
||||||
help="The size of each batch")
|
|
||||||
|
|
||||||
parser.add_argument('--n_epochs',
|
|
||||||
type=int,
|
|
||||||
default=20,
|
|
||||||
help="The number of epochs")
|
|
||||||
|
|
||||||
parser.add_argument('--gae_lambda',
|
|
||||||
type=float,
|
|
||||||
default=0.95,
|
|
||||||
help="The lambda parameter of the GAE")
|
|
||||||
|
|
||||||
parser.add_argument('--no_training',
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="Decides if the algorithm should train.")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
random.seed(args.seed)
|
|
||||||
np.random.seed(args.seed)
|
|
||||||
T.manual_seed(args.seed)
|
|
||||||
|
|
||||||
n_episodes = args.n_episodes
|
|
||||||
episode_length = args.ep_length
|
|
||||||
n_players = args.n_players
|
|
||||||
|
|
||||||
home_folder = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
chkpt_path = os.path.join(home_folder, args.chkpt_path)
|
|
||||||
figure_path = os.path.join(home_folder, args.figure_path)
|
|
||||||
|
|
||||||
horizon = args.horizon
|
|
||||||
no_training = args.no_training
|
|
||||||
learnings_per_episode = int(episode_length/horizon)
|
|
||||||
learn_iters = 0
|
|
||||||
|
|
||||||
show_pygame = args.show_pg
|
|
||||||
|
|
||||||
# Setup AI stuff
|
|
||||||
score_history = np.zeros(shape=(n_players, n_episodes))
|
|
||||||
|
|
||||||
best_score = np.zeros(n_players)
|
|
||||||
|
|
||||||
actor_loss = np.zeros(shape=(n_players,
|
|
||||||
n_episodes))
|
|
||||||
|
|
||||||
critic_loss = np.zeros(shape=(n_players,
|
|
||||||
n_episodes))
|
|
||||||
|
|
||||||
total_loss = np.zeros(shape=(n_players,
|
|
||||||
n_episodes))
|
|
||||||
|
|
||||||
game = Game(show_pg=show_pygame, n_players=n_players)
|
|
||||||
|
|
||||||
print("Initializing agents ...")
|
|
||||||
for player in tqdm(game.level.player_sprites,
|
|
||||||
dynamic_ncols=True):
|
|
||||||
player.setup_agent(
|
|
||||||
gamma=args.gamma,
|
|
||||||
alpha=args.alpha,
|
|
||||||
policy_clip=args.policy_clip,
|
|
||||||
batch_size=args.batch_size,
|
|
||||||
n_epochs=args.n_epochs,
|
|
||||||
gae_lambda=args.gae_lambda,
|
|
||||||
entropy_coef=args.entropy,
|
|
||||||
chkpt_dir=chkpt_path,
|
|
||||||
no_load=args.no_load
|
|
||||||
)
|
|
||||||
|
|
||||||
# Episodes start
|
|
||||||
for episode in tqdm(range(n_episodes),
|
|
||||||
dynamic_ncols=True):
|
|
||||||
|
|
||||||
game.level.reset()
|
|
||||||
|
|
||||||
episode_reward = np.zeros(
|
|
||||||
shape=(n_players, episode_length))
|
|
||||||
|
|
||||||
episode_actor_loss = np.zeros(
|
|
||||||
shape=(n_players, learnings_per_episode))
|
|
||||||
|
|
||||||
episode_critic_loss = np.zeros(
|
|
||||||
shape=(n_players, learnings_per_episode))
|
|
||||||
|
|
||||||
episode_total_loss = np.zeros(
|
|
||||||
shape=(n_players, learnings_per_episode))
|
|
||||||
|
|
||||||
# Main game loop
|
|
||||||
for step in tqdm(range(episode_length),
|
|
||||||
leave=False,
|
|
||||||
ascii=True,
|
|
||||||
dynamic_ncols=True):
|
|
||||||
|
|
||||||
if not game.level.done:
|
|
||||||
game.run()
|
|
||||||
|
|
||||||
for player in game.level.player_sprites:
|
|
||||||
|
|
||||||
episode_reward[player.player_id][step] = player.reward
|
|
||||||
|
|
||||||
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
|
|
||||||
|
|
||||||
player.agent.learn()
|
|
||||||
|
|
||||||
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
|
|
||||||
= player.agent.actor_loss
|
|
||||||
|
|
||||||
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
|
|
||||||
= player.agent.critic_loss
|
|
||||||
|
|
||||||
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
|
|
||||||
= player.agent.total_loss
|
|
||||||
|
|
||||||
learn_iters += 1
|
|
||||||
|
|
||||||
# Gather information about the episode
|
|
||||||
for player in game.level.player_sprites:
|
|
||||||
|
|
||||||
score = np.mean(episode_reward[player.player_id])
|
|
||||||
|
|
||||||
# Update score
|
|
||||||
score_history[player.player_id][episode] = score
|
|
||||||
|
|
||||||
# Update actor/critic loss
|
|
||||||
actor_loss[player.player_id][episode] = np.mean(
|
|
||||||
episode_actor_loss)
|
|
||||||
|
|
||||||
critic_loss[player.player_id][episode] = np.mean(
|
|
||||||
episode_critic_loss)
|
|
||||||
|
|
||||||
total_loss[player.player_id][episode] = np.mean(
|
|
||||||
episode_total_loss)
|
|
||||||
|
|
||||||
# Check for new best score
|
|
||||||
if score > best_score[player.player_id]:
|
|
||||||
print(f"\nEpisode:\
|
|
||||||
{episode}\
|
|
||||||
\nNew best score for player {player.player_id}:\
|
|
||||||
{score}\
|
|
||||||
\nOld best score for player {player.player_id}: \
|
|
||||||
{best_score[player.player_id]}")
|
|
||||||
|
|
||||||
best_score[player.player_id] = score
|
|
||||||
|
|
||||||
print(f"Saving models for player {player.player_id}...")
|
|
||||||
|
|
||||||
# Save models
|
|
||||||
player.agent.save_models(
|
|
||||||
f"A{player.player_id}",
|
|
||||||
f"C{player.player_id}")
|
|
||||||
|
|
||||||
print(f"Models saved to {chkpt_path}")
|
|
||||||
|
|
||||||
plt.figure()
|
|
||||||
plt.title("Agent Rewards")
|
|
||||||
plt.xlabel("Episode")
|
|
||||||
plt.ylabel("Score")
|
|
||||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
|
||||||
for player_score in score_history:
|
|
||||||
plt.plot(player_score)
|
|
||||||
plt.savefig(os.path.join(figure_path, 'score.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
plt.figure()
|
|
||||||
plt.suptitle("Actor Loss")
|
|
||||||
plt.xlabel("Episode")
|
|
||||||
plt.ylabel("Loss")
|
|
||||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
|
||||||
for actor in actor_loss:
|
|
||||||
plt.plot(actor)
|
|
||||||
plt.savefig(os.path.join(figure_path, 'actor_loss.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
plt.figure()
|
|
||||||
plt.suptitle("Critic Loss")
|
|
||||||
plt.xlabel("Episode")
|
|
||||||
plt.ylabel("Loss")
|
|
||||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
|
||||||
for critic in critic_loss:
|
|
||||||
plt.plot(critic)
|
|
||||||
plt.savefig(os.path.join(figure_path, 'critic_loss.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
plt.figure()
|
|
||||||
plt.suptitle("Total Loss")
|
|
||||||
plt.xlabel("Episode")
|
|
||||||
plt.ylabel("Loss")
|
|
||||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
|
||||||
for total in total_loss:
|
|
||||||
plt.plot(total)
|
|
||||||
plt.savefig(os.path.join(figure_path, 'total_loss.png'))
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
# End of training session
|
|
||||||
print("End of episodes.\
|
|
||||||
\nExiting game...")
|
|
||||||
|
|
||||||
game.quit()
|
|
16
utils/hyperparams.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
HPARAMS = {
|
||||||
|
|
||||||
|
"horizon": 2048,
|
||||||
|
"num_epochs": 15,
|
||||||
|
"batch_size": 128,
|
||||||
|
|
||||||
|
"policy_clip": 0.1,
|
||||||
|
"discount_factor": 0.99,
|
||||||
|
"GAE_lambda": 0.95,
|
||||||
|
|
||||||
|
"entropy_coeff": 0.01,
|
||||||
|
"value_coeff": 0.5,
|
||||||
|
|
||||||
|
"learning_rate": 0.0003,
|
||||||
|
|
||||||
|
}
|
92
utils/metrics.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def generate(parsed_args):
|
||||||
|
|
||||||
|
# Setup parameter monitoring
|
||||||
|
score_history = np.zeros(
|
||||||
|
shape=(parsed_args.n_agents, parsed_args.n_episodes))
|
||||||
|
|
||||||
|
best_score = np.zeros(parsed_args.n_agents)
|
||||||
|
|
||||||
|
actor_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
critic_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
total_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
entropy = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
advantage = np.zeros(shape=(parsed_args.n_agents,
|
||||||
|
parsed_args.n_episodes))
|
||||||
|
|
||||||
|
return score_history, best_score, actor_loss,
|
||||||
|
critic_loss, total_loss, entropy,
|
||||||
|
advantage
|
||||||
|
|
||||||
|
|
||||||
|
def plot_learning_curve(scores, num_players, figure_path):
|
||||||
|
|
||||||
|
plt.figure()
|
||||||
|
plt.title("Running Average - Score")
|
||||||
|
plt.xlabel("Episode")
|
||||||
|
plt.ylabel("Score")
|
||||||
|
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||||
|
for score in scores:
|
||||||
|
running_avg = np.zeros(len(score))
|
||||||
|
for i in range(len(score)):
|
||||||
|
running_avg[i] = np.mean(score[max(0, i-100):(i+1)])
|
||||||
|
plt.plot(running_avg)
|
||||||
|
plt.savefig(os.path.join(figure_path, "avg_score.png"))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_score(scores, num_players, figure_path):
|
||||||
|
|
||||||
|
plt.figure()
|
||||||
|
plt.title("Agent Rewards - No Averaging")
|
||||||
|
plt.xlabel("Episode")
|
||||||
|
plt.ylabel("Score")
|
||||||
|
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||||
|
for player_score in scores:
|
||||||
|
plt.plot(player_score)
|
||||||
|
plt.savefig(os.path.join(figure_path, 'score.png'))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_loss(nn_type, losses, num_players, figure_path):
|
||||||
|
|
||||||
|
plt.figure()
|
||||||
|
plt.title(f"Running Average - {nn_type.capitalize()} Loss")
|
||||||
|
plt.xlabel("Learning Iterations")
|
||||||
|
plt.ylabel("Loss")
|
||||||
|
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||||
|
for loss in losses:
|
||||||
|
running_avg = np.zeros(len(loss))
|
||||||
|
for i in range(len(loss)):
|
||||||
|
running_avg[i] = np.mean(loss[max(0, i-100):(i+1)])
|
||||||
|
plt.plot(running_avg)
|
||||||
|
plt.savefig(os.path.join(figure_path, f"{nn_type}_loss.png"))
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def plot_parameter(name, parameter, num_players, figure_path):
|
||||||
|
|
||||||
|
plt.figure()
|
||||||
|
plt.title(f"Running Average - {name.capitalize()}")
|
||||||
|
plt.xlabel("Learning Iterations")
|
||||||
|
plt.ylabel(f"{name.capitalize()}")
|
||||||
|
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||||
|
for param in parameter:
|
||||||
|
running_avg = np.zeros(len(param))
|
||||||
|
for i in range(len(param)):
|
||||||
|
running_avg[i] = np.mean(param[max(0, i-100):(i+1)])
|
||||||
|
plt.plot(running_avg)
|
||||||
|
plt.savefig(os.path.join(figure_path, f"{name}.png"))
|
||||||
|
plt.close()
|
11
utils/seeds.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
import random
|
||||||
|
import torch as T
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def set_seeds(value):
|
||||||
|
|
||||||
|
random.seed(value)
|
||||||
|
np.random.seed(value)
|
||||||
|
T.manual_seed(value)
|
||||||
|
T.cuda.manual_seed_all(value)
|