Big update
BIN
.DS_Store
vendored
BIN
agents/.DS_Store
vendored
BIN
agents/ppo/.DS_Store
vendored
|
@ -1 +0,0 @@
|
|||
# This is a folder with all the saved models.
|
91
args.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
import argparse
|
||||
from utils.hyperparams import HPARAMS
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='Pneuma',
|
||||
description='A Reinforcement Learning platform made with PyGame'
|
||||
)
|
||||
|
||||
# Define seed
|
||||
parser.add_argument('--no_seed',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set to True to run without a seed.")
|
||||
|
||||
parser.add_argument('--seed',
|
||||
type=int,
|
||||
default=1,
|
||||
help="The seed for the RNG.")
|
||||
|
||||
# Define episodes and agents
|
||||
parser.add_argument('--n_episodes',
|
||||
type=int,
|
||||
default=300,
|
||||
help="Number of episodes.")
|
||||
|
||||
parser.add_argument('--ep_length',
|
||||
type=int,
|
||||
default=5000,
|
||||
help="Length of each episode.")
|
||||
|
||||
parser.add_argument('--n_agents',
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of agents.")
|
||||
|
||||
# Define hyperparameters
|
||||
parser.add_argument('--horizon',
|
||||
type=int,
|
||||
default=HPARAMS["horizon"],
|
||||
help="The number of steps per update")
|
||||
|
||||
parser.add_argument('--gamma',
|
||||
type=float,
|
||||
default=HPARAMS["discount_factor"],
|
||||
help="The discount factor for PPO")
|
||||
|
||||
parser.add_argument('--entropy_coeff',
|
||||
type=float,
|
||||
default=HPARAMS["entropy_coeff"],
|
||||
help="The entropy coefficient")
|
||||
|
||||
parser.add_argument('--alpha',
|
||||
type=float,
|
||||
default=HPARAMS["learning_rate"],
|
||||
help="The learning_rate for PPO")
|
||||
|
||||
parser.add_argument('--policy_clip',
|
||||
type=float,
|
||||
default=HPARAMS["policy_clip"],
|
||||
help="The policy clip for PPO")
|
||||
|
||||
parser.add_argument('--batch_size',
|
||||
type=int,
|
||||
default=HPARAMS["batch_size"],
|
||||
help="The size of each batch")
|
||||
|
||||
parser.add_argument('--n_epochs',
|
||||
type=int,
|
||||
default=HPARAMS["num_epochs"],
|
||||
help="The number of epochs")
|
||||
|
||||
parser.add_argument('--gae_lambda',
|
||||
type=float,
|
||||
default=HPARAMS["GAE_lambda"],
|
||||
help="The lambda parameter of the GAE")
|
||||
|
||||
# Misc
|
||||
parser.add_argument('--no_training',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set flag to disable learning. Useful for viewing trained agents interact in the environment.")
|
||||
|
||||
parser.add_argument('--show_pg',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set flag to open a PyGame window on desktop")
|
||||
|
||||
return parser.parse_args()
|
BIN
chkpts/run2/A0
Normal file
BIN
chkpts/run2/C0
Normal file
BIN
configs/.DS_Store
vendored
|
@ -1,7 +1,7 @@
|
|||
import pygame
|
||||
from random import randint
|
||||
|
||||
from configs.system.window_config import TILESIZE
|
||||
from config.system.window import TILESIZE
|
||||
|
||||
|
||||
class MagicPlayer:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import pygame
|
||||
from random import randint, choice
|
||||
|
||||
from configs.game.spell_config import magic_data
|
||||
from configs.game.weapon_config import weapon_data
|
||||
from config.game.spell_config import magic_data
|
||||
from config.game.weapon_config import weapon_data
|
||||
|
||||
from .movement import MovementHandler
|
||||
from .combat import CombatHandler
|
||||
|
|
|
@ -4,7 +4,7 @@ from math import sin
|
|||
|
||||
from utils.resource_loader import import_folder, import_assets
|
||||
|
||||
from configs.system.window_config import HITBOX_OFFSET
|
||||
from config.system.window import HITBOX_OFFSET
|
||||
|
||||
|
||||
class AnimationHandler:
|
|
@ -1,8 +1,8 @@
|
|||
from effects.weapon_effects import Weapon
|
||||
from effects.magic_effects import MagicPlayer
|
||||
|
||||
from configs.game.weapon_config import weapon_data
|
||||
from configs.game.spell_config import magic_data
|
||||
from config.game.weapon_config import weapon_data
|
||||
from config.game.spell_config import magic_data
|
||||
|
||||
|
||||
class CombatHandler:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from configs.game.player_config import warrior_stats, mage_stats, tank_stats
|
||||
from configs.game.monster_config import monster_data
|
||||
from config.game.player_config import warrior_stats, mage_stats, tank_stats
|
||||
from config.game.monster_config import monster_data
|
||||
|
||||
|
||||
class StatsHandler:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import pygame
|
||||
|
||||
from .components.animaton import AnimationHandler
|
||||
from .components.animation import AnimationHandler
|
||||
from .components.stats import StatsHandler
|
||||
from .components._input import InputHandler
|
||||
|
||||
|
|
|
@ -2,16 +2,16 @@ import pygame
|
|||
import numpy as np
|
||||
from random import randint
|
||||
|
||||
from configs.game.weapon_config import weapon_data
|
||||
from configs.game.spell_config import magic_data
|
||||
from config.game.weapon_config import weapon_data
|
||||
from config.game.spell_config import magic_data
|
||||
|
||||
from .components.stats import StatsHandler
|
||||
from .components._input import InputHandler
|
||||
from .components.animaton import AnimationHandler
|
||||
from .components.animation import AnimationHandler
|
||||
|
||||
from effects.particle_effects import AnimationPlayer
|
||||
|
||||
from agents.ppo.agent import Agent
|
||||
from ml.ppo.agent import Agent
|
||||
|
||||
|
||||
class Player(pygame.sprite.Sprite):
|
||||
|
|
|
@ -1,257 +0,0 @@
|
|||
import pygame
|
||||
import numpy as np
|
||||
from random import randint
|
||||
|
||||
from configs.game.weapon_config import weapon_data
|
||||
from configs.game.spell_config import magic_data
|
||||
|
||||
from .components.stats import StatsHandler
|
||||
from .components._input import InputHandler
|
||||
from .components.animaton import AnimationHandler
|
||||
|
||||
from effects.particle_effects import AnimationPlayer
|
||||
|
||||
from agents.ppo.agent import Agent
|
||||
|
||||
|
||||
class Player(pygame.sprite.Sprite):
|
||||
def __init__(self,
|
||||
player_id,
|
||||
role,
|
||||
position,
|
||||
map_edge,
|
||||
groups,
|
||||
obstacle_sprites,
|
||||
visible_sprites,
|
||||
attack_sprites,
|
||||
attackable_sprites
|
||||
):
|
||||
super().__init__(groups)
|
||||
|
||||
self.initial_position = position
|
||||
self.map_edge = map_edge
|
||||
self.player_id = player_id
|
||||
self.distance_direction_from_enemy = None
|
||||
|
||||
# Sprite Setup
|
||||
self.sprite_type = "player"
|
||||
self.obstacle_sprites = obstacle_sprites
|
||||
self.visible_sprites = visible_sprites
|
||||
self.attack_sprites = attack_sprites
|
||||
self.attackable_sprites = attackable_sprites
|
||||
|
||||
# Graphics Setup
|
||||
self.animation_player = AnimationPlayer()
|
||||
self.animation = AnimationHandler(self.sprite_type)
|
||||
self.animation.import_assets(position)
|
||||
# Input Setup
|
||||
self._input = InputHandler(
|
||||
self.sprite_type, self.animation_player)
|
||||
|
||||
# Setup Stats
|
||||
self.role = role
|
||||
self.stats = StatsHandler(self.sprite_type, self.role)
|
||||
|
||||
def setup_agent(self,
|
||||
gamma,
|
||||
alpha,
|
||||
policy_clip,
|
||||
batch_size,
|
||||
n_epochs,
|
||||
gae_lambda,
|
||||
chkpt_dir,
|
||||
entropy_coef,
|
||||
no_load=False):
|
||||
|
||||
self.max_num_enemies = len(self.distance_direction_from_enemy)
|
||||
self.get_current_state()
|
||||
self.num_features = len(self.state_features)
|
||||
|
||||
self.agent = Agent(
|
||||
input_dims=self.num_features,
|
||||
n_actions=len(self._input.possible_actions),
|
||||
gamma=gamma,
|
||||
alpha=alpha,
|
||||
policy_clip=policy_clip,
|
||||
batch_size=batch_size,
|
||||
n_epochs=n_epochs,
|
||||
gae_lambda=gae_lambda,
|
||||
entropy_coef=entropy_coef,
|
||||
chkpt_dir=chkpt_dir
|
||||
)
|
||||
print(
|
||||
f"\nAgent initialized on player {self.player_id} using {self.agent.actor.device}.")
|
||||
|
||||
if not no_load:
|
||||
print("Attempting to load models ...")
|
||||
try:
|
||||
self.agent.load_models(
|
||||
actr_chkpt=f"A{self.player_id}",
|
||||
crtc_chkpt=f"C{self.player_id}"
|
||||
)
|
||||
print("Models loaded ...\n")
|
||||
|
||||
except FileNotFoundError:
|
||||
print(
|
||||
f"FileNotFound for player {self.player_id}.\
|
||||
\nSkipping loading ...\n")
|
||||
|
||||
def get_status(self):
|
||||
if self._input.movement.direction.x == 0\
|
||||
and self._input.movement.direction.y == 0:
|
||||
|
||||
if 'idle' not in self._input.status and 'attack' not in self._input.status:
|
||||
self._input.status += '_idle'
|
||||
|
||||
if self._input.attacking:
|
||||
self._input.movement.direction.x = 0
|
||||
self._input.movement.direction.y = 0
|
||||
if 'attack' not in self._input.status:
|
||||
if 'idle' in self._input.status:
|
||||
self._input.status = self._input.status.replace(
|
||||
'idle', 'attack')
|
||||
else:
|
||||
self._input.status += '_attack'
|
||||
else:
|
||||
if 'attack' in self._input.status:
|
||||
self._input.status = self._input.status.replace('_attack', '')
|
||||
|
||||
def attack_logic(self):
|
||||
if self.attack_sprites:
|
||||
for attack_sprite in self.attack_sprites:
|
||||
collision_sprites = pygame.sprite.spritecollide(
|
||||
attack_sprite, self.attackable_sprites, False)
|
||||
if collision_sprites:
|
||||
for target_sprite in collision_sprites:
|
||||
if target_sprite.sprite_type == 'grass':
|
||||
pos = target_sprite.rect.center
|
||||
offset = pygame.math.Vector2(0, 75)
|
||||
for leaf in range(randint(3, 6)):
|
||||
self.animation_player.create_grass_particles(
|
||||
position=pos - offset,
|
||||
groups=[self.visible_sprites])
|
||||
|
||||
target_sprite.kill()
|
||||
else:
|
||||
target_sprite.get_damaged(
|
||||
self, attack_sprite.sprite_type)
|
||||
|
||||
def get_full_weapon_damage(self):
|
||||
base_damage = self.stats.attack
|
||||
weapon_damage = weapon_data[self._input.combat.weapon]['damage']
|
||||
return (base_damage + weapon_damage)
|
||||
|
||||
def get_full_magic_damage(self):
|
||||
base_damage = self.stats.magic
|
||||
spell_damage = magic_data[self._input.combat.magic]['strength']
|
||||
return (base_damage + spell_damage)
|
||||
|
||||
def get_reward(self):
|
||||
|
||||
self.reward = 0
|
||||
|
||||
# Base reward on player exp
|
||||
self.reward += self.stats.exp
|
||||
print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
|
||||
|
||||
# Add relative hp of player
|
||||
self.reward += self.stats.health/self.stats.stats['health']
|
||||
print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
|
||||
|
||||
# Take into account distance of nearest enemy from player relative to the map length
|
||||
self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
|
||||
print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
|
||||
|
||||
# Take into account nearest enemy relative health
|
||||
self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
|
||||
print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
|
||||
|
||||
def get_current_state(self):
|
||||
|
||||
if self.distance_direction_from_enemy != []:
|
||||
sorted_distances = sorted(
|
||||
self.distance_direction_from_enemy, key=lambda x: x[0])
|
||||
else:
|
||||
sorted_distances = np.zeros(self.num_features)
|
||||
|
||||
self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
|
||||
|
||||
self.action_features = [self._input.action]
|
||||
|
||||
self.get_reward()
|
||||
|
||||
self.state_features = [
|
||||
self.animation.rect.center[0]/self.map_edge[0],
|
||||
self.animation.rect.center[1]/self.map_edge[1],
|
||||
self._input.movement.direction.x,
|
||||
self._input.movement.direction.y,
|
||||
self.stats.health/self.stats.stats['health'],
|
||||
self.stats.energy/self.stats.stats['energy'],
|
||||
1 if 'attack' in self._input.status else 0,
|
||||
|
||||
]
|
||||
|
||||
for distance, direction, enemy in self.distance_direction_from_enemy:
|
||||
self.state_features.extend([
|
||||
|
||||
distance/np.sqrt(np.sum(self.map_edge)),
|
||||
|
||||
direction[0],
|
||||
|
||||
direction[1],
|
||||
|
||||
enemy.stats.health /
|
||||
enemy.stats.monster_info['health'],
|
||||
|
||||
enemy.stats.exp,
|
||||
])
|
||||
|
||||
if hasattr(self, 'num_features'):
|
||||
while len(self.state_features) < self.num_features:
|
||||
self.state_features.append(0)
|
||||
|
||||
self.state_features = np.array(self.state_features)
|
||||
|
||||
def is_dead(self):
|
||||
if self.stats.health <= 0:
|
||||
self.stats.health = 0
|
||||
self.animation.import_assets((3264, 448))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def agent_update(self):
|
||||
|
||||
# Get the current state
|
||||
self.get_current_state()
|
||||
|
||||
# Choose action based on current state
|
||||
action, probs, value\
|
||||
= self.agent.choose_action(self.state_features)
|
||||
|
||||
# Apply chosen action
|
||||
self._input.check_input(action,
|
||||
self.stats.speed,
|
||||
self.animation.hitbox,
|
||||
self.obstacle_sprites,
|
||||
self.animation.rect,
|
||||
self)
|
||||
|
||||
self.agent.remember(self.state_features, action,
|
||||
probs, value, self.reward, self.is_dead())
|
||||
|
||||
self.get_current_state()
|
||||
|
||||
def update(self):
|
||||
|
||||
self.agent_update()
|
||||
|
||||
# Cooldowns and Regen
|
||||
self.stats.health_recovery()
|
||||
self.stats.energy_recovery()
|
||||
|
||||
# Refresh player based on input and animate
|
||||
self.get_status()
|
||||
self.animation.animate(
|
||||
self._input.status, self._input.combat.vulnerable)
|
||||
self._input.cooldowns(self._input.combat.vulnerable)
|
|
@ -1,6 +1,6 @@
|
|||
import pygame
|
||||
|
||||
from configs.system.window_config import TILESIZE,\
|
||||
from config.system.window import TILESIZE,\
|
||||
HITBOX_OFFSET
|
||||
|
||||
|
||||
|
|
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 31 KiB |
BIN
figures/run1/actor_loss.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
figures/run1/advantage.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run1/avg_score.png
Normal file
After Width: | Height: | Size: 19 KiB |
BIN
figures/run1/critic_loss.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run1/entropy.png
Normal file
After Width: | Height: | Size: 17 KiB |
BIN
figures/run3/actor_loss.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
figures/run3/advantage.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
figures/run3/avg_score.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
figures/run3/critic_loss.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
figures/run3/entropy.png
Normal file
After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 51 KiB |
Before Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 26 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 30 KiB |
30
folder_struct.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
|
||||
|
||||
def set_directories(base_path):
|
||||
|
||||
if not os.path.exists(base_path):
|
||||
os.makedirs(base_path)
|
||||
|
||||
trial_dirs = [directory for directory in os.listdir(
|
||||
base_path) if os.path.isdir(os.path.join(base_path, directory))]
|
||||
trial_nums = sorted([int(directory[-1])
|
||||
for directory in trial_dirs if directory.startswith("run") and directory[-1].isdigit()])
|
||||
next_trial_num = trial_nums[-1] + 1 if trial_nums else 1
|
||||
new_trial_path = os.path.join(base_path, f"run{next_trial_num}")
|
||||
|
||||
os.makedirs(new_trial_path)
|
||||
return new_trial_path
|
||||
|
||||
|
||||
def setup_dirs():
|
||||
|
||||
home_folder = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
chkpt_path = os.path.join(home_folder, 'chkpts')
|
||||
chkpt_path = set_directories(chkpt_path)
|
||||
|
||||
figure_path = os.path.join(home_folder, 'figures')
|
||||
figure_path = set_directories(figure_path)
|
||||
|
||||
return chkpt_path, figure_path
|
13
game.py
|
@ -1,16 +1,19 @@
|
|||
from configs.system.window_config import WIDTH,\
|
||||
import os
|
||||
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
|
||||
|
||||
from config.system.window import WIDTH,\
|
||||
HEIGHT,\
|
||||
WATER_COLOR,\
|
||||
FPS
|
||||
from level import Level
|
||||
import pygame
|
||||
import sys
|
||||
import os
|
||||
|
||||
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
|
||||
|
||||
|
||||
class Game:
|
||||
|
||||
|
||||
|
||||
class Pneuma:
|
||||
|
||||
def __init__(self, show_pg=False, n_players=1,):
|
||||
print(f"Initializing Pneuma with {n_players} player(s).\
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import pygame
|
||||
|
||||
from configs.game.weapon_config import weapon_data
|
||||
from configs.game.spell_config import magic_data
|
||||
from config.game.weapon_config import weapon_data
|
||||
from config.game.spell_config import magic_data
|
||||
|
||||
from .ui_settings import UI_FONT,\
|
||||
UI_FONT_SIZE,\
|
||||
|
|
3
level.py
|
@ -4,14 +4,13 @@ import numpy as np
|
|||
|
||||
from random import choice
|
||||
|
||||
from configs.system.window_config import TILESIZE
|
||||
from config.system.window import TILESIZE
|
||||
|
||||
from utils.debug import debug
|
||||
from utils.resource_loader import import_csv_layout, import_folder
|
||||
|
||||
from interface.ui import UI
|
||||
|
||||
from entities.observer import Observer
|
||||
from entities.player import Player
|
||||
from entities.enemy import Enemy
|
||||
from entities.terrain import Terrain
|
||||
|
|
185
main.py
Normal file
|
@ -0,0 +1,185 @@
|
|||
import os
|
||||
import random
|
||||
import torch as T
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
import args
|
||||
import folder_struct
|
||||
|
||||
import utils.seeds as seeds
|
||||
import utils.metrics as metrics
|
||||
|
||||
from game import Pneuma
|
||||
|
||||
def main():
|
||||
|
||||
parsed_args = args.parse_args()
|
||||
|
||||
if not parsed_args.no_seed:
|
||||
seeds.set_seeds(parsed_args.seed)
|
||||
print(f"Seed set as {parsed_args.seed}")
|
||||
else:
|
||||
print("No seed set")
|
||||
|
||||
chkpt_path, figure_path = folder_struct.setup_dirs()
|
||||
|
||||
n_episodes = parsed_args.n_episodes
|
||||
episode_length = parsed_args.ep_length
|
||||
n_agents = parsed_args.n_agents
|
||||
|
||||
horizon = parsed_args.horizon
|
||||
no_training = parsed_args.no_training
|
||||
|
||||
learnings_per_episode = int(episode_length/horizon)
|
||||
learn_iters = 0
|
||||
|
||||
show_pygame = parsed_args.show_pg
|
||||
|
||||
# Setup AI metrics
|
||||
|
||||
# Setup parameter monitoring
|
||||
score_history = np.zeros(
|
||||
shape=(parsed_args.n_agents, parsed_args.n_episodes))
|
||||
|
||||
best_score = np.zeros(parsed_args.n_agents)
|
||||
|
||||
actor_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
critic_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
total_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
entropy = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
advantage = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
# score_history, best_score, actor_loss, critic_loss, total_loss, entropy, advantage = metrics.generate(parsed_args)
|
||||
|
||||
|
||||
game = Pneuma(show_pg=show_pygame, n_players=parsed_args.n_agents)
|
||||
|
||||
print("Initializing agents ...")
|
||||
for player in tqdm(game.level.player_sprites,
|
||||
dynamic_ncols=True):
|
||||
player.setup_agent(
|
||||
gamma=parsed_args.gamma,
|
||||
alpha=parsed_args.alpha,
|
||||
policy_clip=parsed_args.policy_clip,
|
||||
batch_size=parsed_args.batch_size,
|
||||
n_epochs=parsed_args.n_epochs,
|
||||
gae_lambda=parsed_args.gae_lambda,
|
||||
entropy_coef=parsed_args.entropy_coeff,
|
||||
chkpt_dir=chkpt_path,
|
||||
no_load=True
|
||||
)
|
||||
|
||||
# Episodes start
|
||||
for episode in tqdm(range(n_episodes),
|
||||
dynamic_ncols=True):
|
||||
|
||||
game.level.reset()
|
||||
|
||||
episode_reward = np.zeros(
|
||||
shape=(n_agents, episode_length))
|
||||
|
||||
episode_actor_loss = np.zeros(
|
||||
shape=(n_agents, learnings_per_episode))
|
||||
|
||||
episode_critic_loss = np.zeros(
|
||||
shape=(n_agents, learnings_per_episode))
|
||||
|
||||
episode_total_loss = np.zeros(
|
||||
shape=(n_agents, learnings_per_episode))
|
||||
|
||||
# Main game loop
|
||||
for step in tqdm(range(episode_length),
|
||||
leave=False,
|
||||
ascii=True,
|
||||
dynamic_ncols=True):
|
||||
|
||||
if not game.level.done:
|
||||
game.run()
|
||||
|
||||
for player in game.level.player_sprites:
|
||||
|
||||
episode_reward[player.player_id][step] = player.reward
|
||||
|
||||
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
|
||||
|
||||
player.agent.learn()
|
||||
|
||||
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.actor_loss
|
||||
|
||||
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.critic_loss
|
||||
|
||||
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.total_loss
|
||||
|
||||
learn_iters += 1
|
||||
|
||||
# Gather information about the episode
|
||||
for player in game.level.player_sprites:
|
||||
|
||||
score = np.mean(episode_reward[player.player_id])
|
||||
|
||||
# Update score
|
||||
score_history[player.player_id][episode] = score
|
||||
|
||||
# Update actor/critic loss
|
||||
actor_loss[player.player_id][episode] = np.mean(
|
||||
episode_actor_loss)
|
||||
|
||||
critic_loss[player.player_id][episode] = np.mean(
|
||||
episode_critic_loss)
|
||||
|
||||
total_loss[player.player_id][episode] = np.mean(
|
||||
episode_total_loss)
|
||||
|
||||
# Check for new best score
|
||||
if score > best_score[player.player_id]:
|
||||
print(f"\nEpisode:\
|
||||
{episode}\
|
||||
\nNew best score for player {player.player_id}:\
|
||||
{score}\
|
||||
\nOld best score for player {player.player_id}: \
|
||||
{best_score[player.player_id]}")
|
||||
|
||||
best_score[player.player_id] = score
|
||||
|
||||
print(f"Saving models for player {player.player_id}...")
|
||||
|
||||
# Save models
|
||||
player.agent.save_models(
|
||||
f"A{player.player_id}",
|
||||
f"C{player.player_id}")
|
||||
|
||||
print(f"Models saved to {chkpt_path}")
|
||||
|
||||
metrics.plot_learning_curve(score_history, parsed_args.n_agents, figure_path)
|
||||
|
||||
metrics.plot_loss('actor', actor_loss, parsed_args.n_agents, figure_path)
|
||||
|
||||
metrics.plot_loss('critic', critic_loss, parsed_args.n_agents, figure_path)
|
||||
|
||||
metrics.plot_parameter('entropy', entropy, parsed_args.n_agents, figure_path)
|
||||
|
||||
metrics.plot_parameter('advantage', advantage, parsed_args.n_agents, figure_path)
|
||||
|
||||
# End of training session
|
||||
print("End of episodes.\
|
||||
\nExiting game...")
|
||||
|
||||
game.quit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
293
pneuma.py
|
@ -1,293 +0,0 @@
|
|||
import os
|
||||
import random
|
||||
import argparse
|
||||
import torch as T
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from game import Game
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Create parser
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='Pneuma',
|
||||
description='A Reinforcement Learning platform made with PyGame'
|
||||
)
|
||||
|
||||
# Add args
|
||||
parser.add_argument('--no_seed',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set to True to run without a seed.")
|
||||
|
||||
parser.add_argument('--seed',
|
||||
type=int,
|
||||
default=1,
|
||||
help="The seed for the RNG.")
|
||||
|
||||
parser.add_argument('--n_episodes',
|
||||
type=int,
|
||||
default=300,
|
||||
help="Number of episodes.")
|
||||
|
||||
parser.add_argument('--ep_length',
|
||||
type=int,
|
||||
default=5000,
|
||||
help="Length of each episode.")
|
||||
|
||||
parser.add_argument('--n_players',
|
||||
type=int,
|
||||
default=1,
|
||||
help="Number of players.")
|
||||
|
||||
parser.add_argument('--chkpt_path',
|
||||
type=str,
|
||||
default="agents/saved_models",
|
||||
help="Save/load location for agent models.")
|
||||
|
||||
parser.add_argument('--figure_path',
|
||||
type=str,
|
||||
default="figures",
|
||||
help="Save location for figures.")
|
||||
|
||||
parser.add_argument('--horizon',
|
||||
type=int,
|
||||
default=2048,
|
||||
help="The number of steps per update")
|
||||
|
||||
parser.add_argument('--show_pg',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set to True to open PyGame window on desktop")
|
||||
|
||||
parser.add_argument('--no_load',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Set to True to ignore saved models")
|
||||
|
||||
parser.add_argument('--gamma',
|
||||
type=float,
|
||||
default=0.99,
|
||||
help="The gamma parameter for PPO")
|
||||
|
||||
parser.add_argument('--entropy',
|
||||
type=float,
|
||||
default=0.01,
|
||||
help="The entropy coefficient")
|
||||
|
||||
parser.add_argument('--alpha',
|
||||
type=float,
|
||||
default=0.0003,
|
||||
help="The alpha parameter for PPO")
|
||||
|
||||
parser.add_argument('--policy_clip',
|
||||
type=float,
|
||||
default=0.1,
|
||||
help="The policy clip")
|
||||
|
||||
parser.add_argument('--batch_size',
|
||||
type=int,
|
||||
default=128,
|
||||
help="The size of each batch")
|
||||
|
||||
parser.add_argument('--n_epochs',
|
||||
type=int,
|
||||
default=20,
|
||||
help="The number of epochs")
|
||||
|
||||
parser.add_argument('--gae_lambda',
|
||||
type=float,
|
||||
default=0.95,
|
||||
help="The lambda parameter of the GAE")
|
||||
|
||||
parser.add_argument('--no_training',
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Decides if the algorithm should train.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
random.seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
T.manual_seed(args.seed)
|
||||
|
||||
n_episodes = args.n_episodes
|
||||
episode_length = args.ep_length
|
||||
n_players = args.n_players
|
||||
|
||||
home_folder = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
chkpt_path = os.path.join(home_folder, args.chkpt_path)
|
||||
figure_path = os.path.join(home_folder, args.figure_path)
|
||||
|
||||
horizon = args.horizon
|
||||
no_training = args.no_training
|
||||
learnings_per_episode = int(episode_length/horizon)
|
||||
learn_iters = 0
|
||||
|
||||
show_pygame = args.show_pg
|
||||
|
||||
# Setup AI stuff
|
||||
score_history = np.zeros(shape=(n_players, n_episodes))
|
||||
|
||||
best_score = np.zeros(n_players)
|
||||
|
||||
actor_loss = np.zeros(shape=(n_players,
|
||||
n_episodes))
|
||||
|
||||
critic_loss = np.zeros(shape=(n_players,
|
||||
n_episodes))
|
||||
|
||||
total_loss = np.zeros(shape=(n_players,
|
||||
n_episodes))
|
||||
|
||||
game = Game(show_pg=show_pygame, n_players=n_players)
|
||||
|
||||
print("Initializing agents ...")
|
||||
for player in tqdm(game.level.player_sprites,
|
||||
dynamic_ncols=True):
|
||||
player.setup_agent(
|
||||
gamma=args.gamma,
|
||||
alpha=args.alpha,
|
||||
policy_clip=args.policy_clip,
|
||||
batch_size=args.batch_size,
|
||||
n_epochs=args.n_epochs,
|
||||
gae_lambda=args.gae_lambda,
|
||||
entropy_coef=args.entropy,
|
||||
chkpt_dir=chkpt_path,
|
||||
no_load=args.no_load
|
||||
)
|
||||
|
||||
# Episodes start
|
||||
for episode in tqdm(range(n_episodes),
|
||||
dynamic_ncols=True):
|
||||
|
||||
game.level.reset()
|
||||
|
||||
episode_reward = np.zeros(
|
||||
shape=(n_players, episode_length))
|
||||
|
||||
episode_actor_loss = np.zeros(
|
||||
shape=(n_players, learnings_per_episode))
|
||||
|
||||
episode_critic_loss = np.zeros(
|
||||
shape=(n_players, learnings_per_episode))
|
||||
|
||||
episode_total_loss = np.zeros(
|
||||
shape=(n_players, learnings_per_episode))
|
||||
|
||||
# Main game loop
|
||||
for step in tqdm(range(episode_length),
|
||||
leave=False,
|
||||
ascii=True,
|
||||
dynamic_ncols=True):
|
||||
|
||||
if not game.level.done:
|
||||
game.run()
|
||||
|
||||
for player in game.level.player_sprites:
|
||||
|
||||
episode_reward[player.player_id][step] = player.reward
|
||||
|
||||
if not no_training and ((step % horizon == 0 and step != 0) or player.is_dead()):
|
||||
|
||||
player.agent.learn()
|
||||
|
||||
episode_actor_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.actor_loss
|
||||
|
||||
episode_critic_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.critic_loss
|
||||
|
||||
episode_total_loss[player.player_id][learn_iters % learnings_per_episode]\
|
||||
= player.agent.total_loss
|
||||
|
||||
learn_iters += 1
|
||||
|
||||
# Gather information about the episode
|
||||
for player in game.level.player_sprites:
|
||||
|
||||
score = np.mean(episode_reward[player.player_id])
|
||||
|
||||
# Update score
|
||||
score_history[player.player_id][episode] = score
|
||||
|
||||
# Update actor/critic loss
|
||||
actor_loss[player.player_id][episode] = np.mean(
|
||||
episode_actor_loss)
|
||||
|
||||
critic_loss[player.player_id][episode] = np.mean(
|
||||
episode_critic_loss)
|
||||
|
||||
total_loss[player.player_id][episode] = np.mean(
|
||||
episode_total_loss)
|
||||
|
||||
# Check for new best score
|
||||
if score > best_score[player.player_id]:
|
||||
print(f"\nEpisode:\
|
||||
{episode}\
|
||||
\nNew best score for player {player.player_id}:\
|
||||
{score}\
|
||||
\nOld best score for player {player.player_id}: \
|
||||
{best_score[player.player_id]}")
|
||||
|
||||
best_score[player.player_id] = score
|
||||
|
||||
print(f"Saving models for player {player.player_id}...")
|
||||
|
||||
# Save models
|
||||
player.agent.save_models(
|
||||
f"A{player.player_id}",
|
||||
f"C{player.player_id}")
|
||||
|
||||
print(f"Models saved to {chkpt_path}")
|
||||
|
||||
plt.figure()
|
||||
plt.title("Agent Rewards")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Score")
|
||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
||||
for player_score in score_history:
|
||||
plt.plot(player_score)
|
||||
plt.savefig(os.path.join(figure_path, 'score.png'))
|
||||
plt.close()
|
||||
|
||||
plt.figure()
|
||||
plt.suptitle("Actor Loss")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Loss")
|
||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
||||
for actor in actor_loss:
|
||||
plt.plot(actor)
|
||||
plt.savefig(os.path.join(figure_path, 'actor_loss.png'))
|
||||
plt.close()
|
||||
|
||||
plt.figure()
|
||||
plt.suptitle("Critic Loss")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Loss")
|
||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
||||
for critic in critic_loss:
|
||||
plt.plot(critic)
|
||||
plt.savefig(os.path.join(figure_path, 'critic_loss.png'))
|
||||
plt.close()
|
||||
|
||||
plt.figure()
|
||||
plt.suptitle("Total Loss")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Loss")
|
||||
plt.legend([f"Agent {num}" for num in range(n_players)])
|
||||
for total in total_loss:
|
||||
plt.plot(total)
|
||||
plt.savefig(os.path.join(figure_path, 'total_loss.png'))
|
||||
plt.close()
|
||||
|
||||
# End of training session
|
||||
print("End of episodes.\
|
||||
\nExiting game...")
|
||||
|
||||
game.quit()
|
16
utils/hyperparams.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
HPARAMS = {
|
||||
|
||||
"horizon": 2048,
|
||||
"num_epochs": 15,
|
||||
"batch_size": 128,
|
||||
|
||||
"policy_clip": 0.1,
|
||||
"discount_factor": 0.99,
|
||||
"GAE_lambda": 0.95,
|
||||
|
||||
"entropy_coeff": 0.01,
|
||||
"value_coeff": 0.5,
|
||||
|
||||
"learning_rate": 0.0003,
|
||||
|
||||
}
|
92
utils/metrics.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def generate(parsed_args):
|
||||
|
||||
# Setup parameter monitoring
|
||||
score_history = np.zeros(
|
||||
shape=(parsed_args.n_agents, parsed_args.n_episodes))
|
||||
|
||||
best_score = np.zeros(parsed_args.n_agents)
|
||||
|
||||
actor_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
critic_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
total_loss = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
entropy = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
advantage = np.zeros(shape=(parsed_args.n_agents,
|
||||
parsed_args.n_episodes))
|
||||
|
||||
return score_history, best_score, actor_loss,
|
||||
critic_loss, total_loss, entropy,
|
||||
advantage
|
||||
|
||||
|
||||
def plot_learning_curve(scores, num_players, figure_path):
|
||||
|
||||
plt.figure()
|
||||
plt.title("Running Average - Score")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Score")
|
||||
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||
for score in scores:
|
||||
running_avg = np.zeros(len(score))
|
||||
for i in range(len(score)):
|
||||
running_avg[i] = np.mean(score[max(0, i-100):(i+1)])
|
||||
plt.plot(running_avg)
|
||||
plt.savefig(os.path.join(figure_path, "avg_score.png"))
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_score(scores, num_players, figure_path):
|
||||
|
||||
plt.figure()
|
||||
plt.title("Agent Rewards - No Averaging")
|
||||
plt.xlabel("Episode")
|
||||
plt.ylabel("Score")
|
||||
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||
for player_score in scores:
|
||||
plt.plot(player_score)
|
||||
plt.savefig(os.path.join(figure_path, 'score.png'))
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_loss(nn_type, losses, num_players, figure_path):
|
||||
|
||||
plt.figure()
|
||||
plt.title(f"Running Average - {nn_type.capitalize()} Loss")
|
||||
plt.xlabel("Learning Iterations")
|
||||
plt.ylabel("Loss")
|
||||
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||
for loss in losses:
|
||||
running_avg = np.zeros(len(loss))
|
||||
for i in range(len(loss)):
|
||||
running_avg[i] = np.mean(loss[max(0, i-100):(i+1)])
|
||||
plt.plot(running_avg)
|
||||
plt.savefig(os.path.join(figure_path, f"{nn_type}_loss.png"))
|
||||
plt.close()
|
||||
|
||||
|
||||
def plot_parameter(name, parameter, num_players, figure_path):
|
||||
|
||||
plt.figure()
|
||||
plt.title(f"Running Average - {name.capitalize()}")
|
||||
plt.xlabel("Learning Iterations")
|
||||
plt.ylabel(f"{name.capitalize()}")
|
||||
plt.legend([f"Agent {num}" for num in range(num_players)])
|
||||
for param in parameter:
|
||||
running_avg = np.zeros(len(param))
|
||||
for i in range(len(param)):
|
||||
running_avg[i] = np.mean(param[max(0, i-100):(i+1)])
|
||||
plt.plot(running_avg)
|
||||
plt.savefig(os.path.join(figure_path, f"{name}.png"))
|
||||
plt.close()
|
11
utils/seeds.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import random
|
||||
import torch as T
|
||||
import numpy as np
|
||||
|
||||
|
||||
def set_seeds(value):
|
||||
|
||||
random.seed(value)
|
||||
np.random.seed(value)
|
||||
T.manual_seed(value)
|
||||
T.cuda.manual_seed_all(value)
|