Updated architecture and default hyperparams

2024-01-21 21:10:27 +01:00 · 2024-01-21 21:10:27 +01:00 · 8d3f4506ba
commit 8d3f4506ba
parent 4394cc7452
22 changed files with 300 additions and 54 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/agents/.DS_Store
+++ b/agents/.DS_Store
--- a/agents/ppo/brain.py
+++ b/agents/ppo/brain.py
@ -52,7 +52,7 @@ class PPOMemory:

 class ActorNetwork(nn.Module):

-    def __init__(self, input_dim, output_dim, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
+    def __init__(self, input_dim, output_dim, alpha, fc1_dims=512, fc2_dims=512, chkpt_dir='tmp/ppo'):
        super(ActorNetwork, self).__init__()

        self.chkpt_dir = chkpt_dir
@ -89,7 +89,7 @@ class ActorNetwork(nn.Module):

 class CriticNetwork(nn.Module):

-    def __init__(self, input_dims, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
+    def __init__(self, input_dims, alpha, fc1_dims=512, fc2_dims=512, chkpt_dir='tmp/ppo'):
        super(CriticNetwork, self).__init__()

        self.chkpt_dir = chkpt_dir
--- a/agents/saved_models/A0
+++ b/agents/saved_models/A0
--- a/agents/saved_models/C0
+++ b/agents/saved_models/C0
--- a/configs/game/monster_config.py
+++ b/configs/game/monster_config.py
@ -2,40 +2,40 @@ monster_data = {
    'squid': {'id': 1,
              'health': 1,
              'exp': 10,
-              'attack': 50,
+              'attack': 0,
              'attack_type': 'slash',
              'speed': 3,
              'knockback': 20,
-              'attack_radius': 80,
-              'notice_radius': 360},
+              'attack_radius': 1,
+              'notice_radius': 1},

    'raccoon': {'id': 2,
                'health': 1,
                'exp': 25,
-                'attack': 80,
+                'attack': 0,
                'attack_type': 'claw',
                'speed': 2,
                'knockback': 10,
-                'attack_radius': 120,
-                'notice_radius': 400},
+                'attack_radius': 1,
+                'notice_radius': 1},

    'spirit': {'id': 3,
               'health': 1,
               'exp': 11,
-               'attack': 60,
+               'attack': 0,
               'attack_type': 'thunder',
               'speed': 4,
               'knockback': 20,
-               'attack_radius': 60,
-               'notice_radius': 350},
+               'attack_radius': 1,
+               'notice_radius': 1},

    'bamboo': {'id': 4,
               'health': 1,
               'exp': 9,
-               'attack': 20,
+               'attack': 0,
               'attack_type': 'leaf_attack',
               'speed': 3,
               'knockback': 20,
-               'attack_radius': 50,
-               'notice_radius': 300}
+               'attack_radius': 1,
+               'notice_radius': 1}
 }
--- a/entities/player.py
+++ b/entities/player.py
@ -146,26 +146,6 @@ class Player(pygame.sprite.Sprite):
        spell_damage = magic_data[self._input.combat.magic]['strength']
        return (base_damage + spell_damage)

-    def get_reward(self):
-
-        self.reward = 0
-
-        # Base reward on player exp
-        self.reward += self.stats.exp
-        print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
-
-        # Add relative hp of player
-        self.reward += self.stats.health/self.stats.stats['health']
-        print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
-
-        # Take into account distance of nearest enemy from player relative to the map length
-        self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
-        print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
-
-        # Take into account nearest enemy relative health
-        self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
-        print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
-
    def get_current_state(self):

        if self.distance_direction_from_enemy != []:
@ -174,11 +154,15 @@ class Player(pygame.sprite.Sprite):
        else:
            sorted_distances = np.zeros(self.num_features)

-        self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
+        nearest_dist, nearest_en_dir, nearest_enemy = sorted_distances[0]

        self.action_features = [self._input.action]
+        if hasattr(self, 'state_features'):
+            self.old_state_features = self.state_features

-        self.get_reward()
+            self.reward = self.stats.exp\
+                + self.stats.health/self.stats.stats['health'] - 1\
+                - nearest_dist/np.sqrt(np.sum(self.map_edge))

        self.state_features = [
            self.animation.rect.center[0]/self.map_edge[0],
@ -187,25 +171,30 @@ class Player(pygame.sprite.Sprite):
            self._input.movement.direction.y,
            self.stats.health/self.stats.stats['health'],
            self.stats.energy/self.stats.stats['energy'],
-            1 if 'attack' in self._input.status else 0,
-
        ]

-        for distance, direction, enemy in self.distance_direction_from_enemy:
        self.state_features.extend([
-
-                distance/np.sqrt(np.sum(self.map_edge)),
-
-                direction[0],
-
-                direction[1],
-
-                enemy.stats.health /
-                enemy.stats.monster_info['health'],
-
-                enemy.stats.exp,
+            nearest_dist/np.sqrt(np.sum(self.map_edge)),
+            nearest_en_dir[0],
+            nearest_en_dir[1],
+            nearest_enemy.stats.exp
        ])

+        # for distance, direction, enemy in self.distance_direction_from_enemy:
+        #     self.state_features.extend([
+        #
+        #         distance/np.sqrt(np.sum(self.map_edge)),
+        #
+        #         direction[0],
+        #
+        #         direction[1],
+        #
+        #         enemy.stats.health /
+        #         enemy.stats.monster_info['health'],
+        #
+        #         enemy.stats.exp,
+        #     ])
+
        if hasattr(self, 'num_features'):
            while len(self.state_features) < self.num_features:
                self.state_features.append(0)
--- a/entities/player.sync-conflict-20240111-224358-MEQXTXG.py
+++ b/entities/player.sync-conflict-20240111-224358-MEQXTXG.py
@ -0,0 +1,257 @@
+import pygame
+import numpy as np
+from random import randint
+
+from configs.game.weapon_config import weapon_data
+from configs.game.spell_config import magic_data
+
+from .components.stats import StatsHandler
+from .components._input import InputHandler
+from .components.animaton import AnimationHandler
+
+from effects.particle_effects import AnimationPlayer
+
+from agents.ppo.agent import Agent
+
+
+class Player(pygame.sprite.Sprite):
+    def __init__(self,
+                 player_id,
+                 role,
+                 position,
+                 map_edge,
+                 groups,
+                 obstacle_sprites,
+                 visible_sprites,
+                 attack_sprites,
+                 attackable_sprites
+                 ):
+        super().__init__(groups)
+
+        self.initial_position = position
+        self.map_edge = map_edge
+        self.player_id = player_id
+        self.distance_direction_from_enemy = None
+
+        # Sprite Setup
+        self.sprite_type = "player"
+        self.obstacle_sprites = obstacle_sprites
+        self.visible_sprites = visible_sprites
+        self.attack_sprites = attack_sprites
+        self.attackable_sprites = attackable_sprites
+
+        # Graphics Setup
+        self.animation_player = AnimationPlayer()
+        self.animation = AnimationHandler(self.sprite_type)
+        self.animation.import_assets(position)
+        # Input Setup
+        self._input = InputHandler(
+            self.sprite_type, self.animation_player)
+
+        # Setup Stats
+        self.role = role
+        self.stats = StatsHandler(self.sprite_type, self.role)
+
+    def setup_agent(self,
+                    gamma,
+                    alpha,
+                    policy_clip,
+                    batch_size,
+                    n_epochs,
+                    gae_lambda,
+                    chkpt_dir,
+                    entropy_coef,
+                    no_load=False):
+
+        self.max_num_enemies = len(self.distance_direction_from_enemy)
+        self.get_current_state()
+        self.num_features = len(self.state_features)
+
+        self.agent = Agent(
+            input_dims=self.num_features,
+            n_actions=len(self._input.possible_actions),
+            gamma=gamma,
+            alpha=alpha,
+            policy_clip=policy_clip,
+            batch_size=batch_size,
+            n_epochs=n_epochs,
+            gae_lambda=gae_lambda,
+            entropy_coef=entropy_coef,
+            chkpt_dir=chkpt_dir
+        )
+        print(
+            f"\nAgent initialized on player {self.player_id} using {self.agent.actor.device}.")
+
+        if not no_load:
+            print("Attempting to load models ...")
+            try:
+                self.agent.load_models(
+                    actr_chkpt=f"A{self.player_id}",
+                    crtc_chkpt=f"C{self.player_id}"
+                )
+                print("Models loaded ...\n")
+
+            except FileNotFoundError:
+                print(
+                    f"FileNotFound for player {self.player_id}.\
+                    \nSkipping loading ...\n")
+
+    def get_status(self):
+        if self._input.movement.direction.x == 0\
+                and self._input.movement.direction.y == 0:
+
+            if 'idle' not in self._input.status and 'attack' not in self._input.status:
+                self._input.status += '_idle'
+
+        if self._input.attacking:
+            self._input.movement.direction.x = 0
+            self._input.movement.direction.y = 0
+            if 'attack' not in self._input.status:
+                if 'idle' in self._input.status:
+                    self._input.status = self._input.status.replace(
+                        'idle', 'attack')
+                else:
+                    self._input.status += '_attack'
+        else:
+            if 'attack' in self._input.status:
+                self._input.status = self._input.status.replace('_attack', '')
+
+    def attack_logic(self):
+        if self.attack_sprites:
+            for attack_sprite in self.attack_sprites:
+                collision_sprites = pygame.sprite.spritecollide(
+                    attack_sprite, self.attackable_sprites, False)
+                if collision_sprites:
+                    for target_sprite in collision_sprites:
+                        if target_sprite.sprite_type == 'grass':
+                            pos = target_sprite.rect.center
+                            offset = pygame.math.Vector2(0, 75)
+                            for leaf in range(randint(3, 6)):
+                                self.animation_player.create_grass_particles(
+                                    position=pos - offset,
+                                    groups=[self.visible_sprites])
+
+                            target_sprite.kill()
+                        else:
+                            target_sprite.get_damaged(
+                                self, attack_sprite.sprite_type)
+
+    def get_full_weapon_damage(self):
+        base_damage = self.stats.attack
+        weapon_damage = weapon_data[self._input.combat.weapon]['damage']
+        return (base_damage + weapon_damage)
+
+    def get_full_magic_damage(self):
+        base_damage = self.stats.magic
+        spell_damage = magic_data[self._input.combat.magic]['strength']
+        return (base_damage + spell_damage)
+
+    def get_reward(self):
+
+        self.reward = 0
+
+        # Base reward on player exp
+        self.reward += self.stats.exp
+        print(f'Player exp added to reward: {self.stats.exp} -> {self.reward}')
+
+        # Add relative hp of player
+        self.reward += self.stats.health/self.stats.stats['health']
+        print(f"Player hp added to reward: {self.stats.health/self.stats.stats['health']} -> {self.reward}")
+
+        # Take into account distance of nearest enemy from player relative to the map length
+        self.reward -= self.nearest_dist/np.sqrt(np.sum(self.map_edge))
+        print(f'Relative distance of enemy: {self.nearest_dist/np.sqrt(np.sum(self.map_edge))} -> {self.reward}')
+
+        # Take into account nearest enemy relative health
+        self.reward -= self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']
+        print(f"Enemy hp added: {self.nearest_enemy.stats.health/self.nearest_enemy.stats.monster_info['health']} -> {self.reward}")
+
+    def get_current_state(self):
+
+        if self.distance_direction_from_enemy != []:
+            sorted_distances = sorted(
+                self.distance_direction_from_enemy, key=lambda x: x[0])
+        else:
+            sorted_distances = np.zeros(self.num_features)
+
+        self.nearest_dist, _, self.nearest_enemy = sorted_distances[0]
+
+        self.action_features = [self._input.action]
+
+        self.get_reward()
+
+        self.state_features = [
+            self.animation.rect.center[0]/self.map_edge[0],
+            self.animation.rect.center[1]/self.map_edge[1],
+            self._input.movement.direction.x,
+            self._input.movement.direction.y,
+            self.stats.health/self.stats.stats['health'],
+            self.stats.energy/self.stats.stats['energy'],
+            1 if 'attack' in self._input.status else 0,
+
+        ]
+
+        for distance, direction, enemy in self.distance_direction_from_enemy:
+            self.state_features.extend([
+
+                distance/np.sqrt(np.sum(self.map_edge)),
+
+                direction[0],
+
+                direction[1],
+
+                enemy.stats.health /
+                enemy.stats.monster_info['health'],
+
+                enemy.stats.exp,
+            ])
+
+        if hasattr(self, 'num_features'):
+            while len(self.state_features) < self.num_features:
+                self.state_features.append(0)
+
+        self.state_features = np.array(self.state_features)
+
+    def is_dead(self):
+        if self.stats.health <= 0:
+            self.stats.health = 0
+            self.animation.import_assets((3264, 448))
+            return True
+        else:
+            return False
+
+    def agent_update(self):
+
+        # Get the current state
+        self.get_current_state()
+
+        # Choose action based on current state
+        action, probs, value\
+            = self.agent.choose_action(self.state_features)
+
+        # Apply chosen action
+        self._input.check_input(action,
+                                self.stats.speed,
+                                self.animation.hitbox,
+                                self.obstacle_sprites,
+                                self.animation.rect,
+                                self)
+
+        self.agent.remember(self.state_features, action,
+                            probs, value, self.reward, self.is_dead())
+
+        self.get_current_state()
+
+    def update(self):
+
+        self.agent_update()
+
+        # Cooldowns and Regen
+        self.stats.health_recovery()
+        self.stats.energy_recovery()
+
+        # Refresh player based on input and animate
+        self.get_status()
+        self.animation.animate(
+            self._input.status, self._input.combat.vulnerable)
+        self._input.cooldowns(self._input.combat.vulnerable)
--- a/figures/.DS_Store
+++ b/figures/.DS_Store
--- a/figures/actor_loss.png
+++ b/figures/actor_loss.png
--- a/figures/actor_loss_1en.png
+++ b/figures/actor_loss_1en.png
--- a/figures/actor_loss_1en2.png
+++ b/figures/actor_loss_1en2.png
--- a/figures/critic_loss.png
+++ b/figures/critic_loss.png
--- a/figures/critic_loss_1en.png
+++ b/figures/critic_loss_1en.png
--- a/figures/critic_loss_1en2.png
+++ b/figures/critic_loss_1en2.png
--- a/figures/score.png
+++ b/figures/score.png
--- a/figures/score_1en.png
+++ b/figures/score_1en.png
--- a/figures/score_1en2.png
+++ b/figures/score_1en2.png
--- a/figures/total_loss.png
+++ b/figures/total_loss.png
--- a/figures/total_loss_1en.png
+++ b/figures/total_loss_1en.png
--- a/figures/total_loss_1en2.png
+++ b/figures/total_loss_1en2.png
--- a/pneuma.py
+++ b/pneuma.py
@ -86,17 +86,17 @@ if __name__ == "__main__":

    parser.add_argument('--policy_clip',
                        type=float,
-                        default=0.2,
+                        default=0.1,
                        help="The policy clip")

    parser.add_argument('--batch_size',
                        type=int,
-                        default=64,
+                        default=128,
                        help="The size of each batch")

    parser.add_argument('--n_epochs',
                        type=int,
-                        default=10,
+                        default=20,
                        help="The number of epochs")

    parser.add_argument('--gae_lambda',