Update rewared structure (fixed major bugs)

2023-11-23 12:44:23 +01:00 · 2023-11-23 12:44:23 +01:00 · 1a6ed25673
commit 1a6ed25673
parent ce4a90ac43
11 changed files with 106 additions and 58 deletions
--- a/assets/graphics/icon.png
+++ b/assets/graphics/icon.png
--- a/configs/game/monster_config.py
+++ b/configs/game/monster_config.py
@ -6,7 +6,7 @@ asset_path = os.path.join(
    script_dir, '../..', 'assets')

 monster_data = {
-    'squid': {'id': 1, 'health': 100, 'exp': 100, 'attack': 20, 'attack_type': 'slash', 'attack_sound': f'{asset_path}/audio/attack/slash.wav', 'speed': 3, 'knockback': 20, 'attack_radius': 80, 'notice_radius': 360},
-    'raccoon': {'id': 2, 'health': 300, 'exp': 250, 'attack': 40, 'attack_type': 'claw',  'attack_sound': f'{asset_path}/audio/attack/claw.wav', 'speed': 2, 'knockback': 20, 'attack_radius': 120, 'notice_radius': 400},
-    'spirit': {'id': 3, 'health': 100, 'exp': 110, 'attack': 8, 'attack_type': 'thunder', 'attack_sound': f'{asset_path}/audio/attack/fireball.wav', 'speed': 4, 'knockback': 20, 'attack_radius': 60, 'notice_radius': 350},
-    'bamboo': {'id': 4, 'health': 70, 'exp': 120, 'attack': 6, 'attack_type': 'leaf_attack', 'attack_sound': f'{asset_path}/audio/attack/slash.wav', 'speed': 3, 'knockback': 20, 'attack_radius': 50, 'notice_radius': 300}}
+    'squid': {'id': 1, 'health': .1, 'exp': 1, 'attack': .5, 'attack_type': 'slash', 'speed': 3, 'knockback': 20, 'attack_radius': 80, 'notice_radius': 360},
+    'raccoon': {'id': 2, 'health': .3, 'exp': 2.5, 'attack': .8, 'attack_type': 'claw',  'speed': 2, 'knockback': 20, 'attack_radius': 120, 'notice_radius': 400},
+    'spirit': {'id': 3, 'health': .1, 'exp': 1.1, 'attack': .6, 'attack_type': 'thunder', 'speed': 4, 'knockback': 20, 'attack_radius': 60, 'notice_radius': 350},
+    'bamboo': {'id': 4, 'health': .07, 'exp': 1.2, 'attack': .4, 'attack_type': 'leaf_attack', 'speed': 3, 'knockback': 20, 'attack_radius': 50, 'notice_radius': 300}}
--- a/configs/game/player_config.py
+++ b/configs/game/player_config.py
@ -1,9 +1,9 @@
 tank_stats = {
    'role_id': 1,
-    'health': 150,
-    'energy': 40,
-    'attack': 7,
-    'magic': 3,
+    'health': 1.5,
+    'energy': .4,
+    'attack': .7,
+    'magic': .3,
    'speed': 3
 }

--- a/configs/game/spell_config.py
+++ b/configs/game/spell_config.py
@ -5,5 +5,5 @@ asset_path = os.path.join(
    script_dir, '../..', 'assets')

 magic_data = {
-    'flame': {'strength': 5, 'cost': 20, 'graphic': f"{asset_path}/graphics/particles/flame/fire.png"},
-    'heal': {'strength': 20, 'cost': 10, 'graphic': f"{asset_path}/graphics/particles/heal/heal.png"}}
+    'flame': {'strength': 5, 'cost': .020, 'graphic': f"{asset_path}/graphics/particles/flame/fire.png"},
+    'heal': {'strength': 20, 'cost': .010, 'graphic': f"{asset_path}/graphics/particles/heal/heal.png"}}
--- a/entities/components/stats.py
+++ b/entities/components/stats.py
@ -42,6 +42,12 @@ class StatsHandler:
        else:
            self.energy = self.stats['energy']

+    def health_recovery(self):
+        if self.energy < self.stats['health']:
+            self.energy += 0.15
+        else:
+            self.energy = self.stats['energy']
+
    def get_value_by_index(self, index):
        return list(self.stats.values())[index]

--- a/entities/player.py
+++ b/entities/player.py
@ -16,7 +16,16 @@ from agents.ppo.agent import Agent

 class Player(pygame.sprite.Sprite):

-    def __init__(self, position, groups, obstacle_sprites, visible_sprites, attack_sprites, attackable_sprites, role, player_id):
+    def __init__(self,
+                 position,
+                 groups,
+                 obstacle_sprites,
+                 visible_sprites,
+                 attack_sprites,
+                 attackable_sprites,
+                 role,
+                 player_id):
+
        super().__init__(groups)

        # Setup Sprites
@ -79,7 +88,9 @@ class Player(pygame.sprite.Sprite):
                            offset = pygame.math.Vector2(0, 75)
                            for leaf in range(randint(3, 6)):
                                self.animation_player.create_grass_particles(
-                                    position=pos - offset, groups=[self.visible_sprites])
+                                    position=pos - offset,
+                                    groups=[self.visible_sprites])
+
                            target_sprite.kill()
                        else:
                            target_sprite.get_damaged(
@ -109,13 +120,12 @@ class Player(pygame.sprite.Sprite):

        self.reward_features = [
            self.stats.exp,
-            # nearest_dist,
-            -nearest_enemy.stats.health,
-            self.stats.health
+            np.exp(-nearest_dist**2),
+            np.exp(-nearest_enemy.stats.health**2),
+            -np.exp(-self.stats.health)
        ]

        self.state_features = [
-            # TODO: Find a way to not use magic numbers
            np.exp(-self.rect.center[0]),
            np.exp(-self.rect.center[1]),
            self._input.movement.direction.x,
@ -127,7 +137,6 @@ class Player(pygame.sprite.Sprite):
        enemy_states = []

        for distance, direction, enemy in sorted_distances[:5]:
-            # TODO: Find a way to not use magic numbers
            enemy_states.extend([
                np.exp(-distance),
                direction[0],
@ -156,12 +165,13 @@ class Player(pygame.sprite.Sprite):
            n_epochs=4)
        try:
            self.agent.load_models()
-        except FileNotFoundError as e:
-            print(f"{e}. Skipping loading...")
+        except FileNotFoundError:
+            print("FileNotFoundError for agent.load_model().\
+                Skipping loading...")

    def is_dead(self):
-        if self.stats.health == 0:
-            self.stats.exp = -100
+        if self.stats.health <= 0:
+            self.stats.exp = max(0, self.stats.exp - .5)
            return True
        else:
            return False
@ -182,11 +192,9 @@ class Player(pygame.sprite.Sprite):
                                self.animation.rect,
                                self)

-        self.done = self.is_dead()
-
        self.score = self.stats.exp
        self.agent.remember(self.state_features, action,
-                            probs, value, self.stats.exp, self.done)
+                            probs, value, self.stats.exp, self.is_dead())

        if self.n_steps % self.N == 0:
            self.agent.learn()
@ -194,7 +202,7 @@ class Player(pygame.sprite.Sprite):

        self.get_current_state()

-        if self.done:
+        if self.is_dead():
            self.agent.learn()

        # Refresh objects based on input
@ -207,5 +215,6 @@ class Player(pygame.sprite.Sprite):
        self.rect = self.animation.rect

        # Cooldowns and Regen
+        self.stats.health_recovery()
        self.stats.energy_recovery()
        self._input.cooldowns(self._input.combat.vulnerable)
--- a/game.py
+++ b/game.py
@ -13,17 +13,26 @@ class Game:

        self.screen = pygame.display.set_mode(
            (WIDTH, HEIGHT))
+
        pygame.display.set_caption('Pneuma')
+
+        img = pygame.image.load('assets/graphics/icon.png')
+        pygame.display.set_icon(img)
        self.clock = pygame.time.Clock()

        self.level = Level()

+    def calc_score(self):
+        self.scores = [0 for _ in range(len(self.level.player_sprites))]
+
+        for player in self.level.player_sprites:
+            self.scores[player.player_id] = player.stats.exp
+
    def run(self):

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
-                pygame.quit()
-                sys.exit()
+                self.quit()
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_m:
                    self.level.toggle_menu()
@ -34,3 +43,7 @@ class Game:

        pygame.display.update()
        self.clock.tick(FPS)
+
+    def quit(self):
+        pygame.quit()
+        sys.exit()
--- a/level/level.py
+++ b/level/level.py
@ -1,5 +1,6 @@
 import os
 import pygame
+import numpy as np

 from random import choice

@ -46,6 +47,7 @@ class Level:
        else:
            for player in self.player_sprites:
                player.get_max_num_states()
+        self.dead_players = np.zeros(len(self.player_sprites))

        # UI setup
        self.ui = UI()
@ -174,11 +176,13 @@ class Level:
            self.visible_sprites.custom_draw(self.player)
            self.ui.display(self.player)

-        debug('v0.6')
+        debug('v0.8')

        for player in self.player_sprites:
            if player.is_dead():
-                self.done = True
+                self.dead_players[player.player_id] = True
+
+        self.done = True if self.dead_players.all() == 1 else False

        if not self.game_paused:
            # Update the game
--- a/main.py
+++ b/main.py
@ -1,3 +1,6 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
 from game import Game
 from tqdm import tqdm

@ -6,54 +9,67 @@ environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'


 if __name__ == '__main__':
-    n_episodes = 3000
+
+    n_episodes = 1000
+    game_len = 10000

    figure_file = 'plots/score.png'
-    score_history = []
    best_score = 0
    avg_score = 0

-    agent_list = []
-
-    game_len = 5000
-
    game = Game()

+    agent_list = []
+    exp_points_list = []
+    score_history = np.zeros(
+        shape=(len(game.level.player_sprites), n_episodes, ))
+    best_score = np.zeros(len(game.level.player_sprites))
+    avg_score = np.zeros(len(game.level.player_sprites))
    for i in tqdm(range(n_episodes)):
        # TODO: Make game.level.reset_map() so we don't __init__ everything all the time (such a waste)
        if i != 0:
            game.level.__init__(reset=True)
        # TODO: Make game.level.reset_map() so we don't pull out and load the agent every time (There is -definitevly- a better way)
            for player in game.level.player_sprites:
-            for player_id, agent in agent_list:
-                if player.player_id == player_id:
-                    player.agent = agent
+                for agent in agent_list:
+                    player.agent = agent_list[player.player_id]
+                    player.stats.exp = score_history[player.player_id][i-1]

        agent_list = []
-        done = False
-        score = 0
-        for _ in tqdm(range(game_len)):
-            if not game.level.done:
-                game.run()
-            else:
-                break
-        for player in game.level.player_sprites:
-            agent_list.append((player.player_id, player.agent))

-        if i == n_episodes-1 and game.level.enemy_sprites != []:
+        for j in range(game_len):
+            if not game.level.done:
+
+                game.run()
+                game.calc_score()
+
+                if (j == game_len-1 or game.level.done) and game.level.enemy_sprites != []:
                    for player in game.level.player_sprites:
                        for enemy in game.level.enemy_sprites:
-                    player.stats.exp -= 5
-                player.update()
+                            player.stats.exp *= .95
+            else:
+                break

        for player in game.level.player_sprites:
+            agent_list.append(player.agent)
+            exp_points = player.stats.exp
+            score_history[player.player_id][i] = exp_points
+            avg_score[player.player_id] = np.mean(
+                score_history[player.player_id])
+            if avg_score[player.player_id] >= best_score[player.player_id]:
                player.agent.save_models()
+                best_score[player.player_id] = avg_score[player.player_id]

-        # TODO: Make it so that scores appear here for each player
-        # score_history.append(game.level.player.score)
-        # print(score)
-        # avg_score = np.mean(score_history[-100:])
+            print(
+                f"\nCumulative score for player {player.player_id}:\
+                    {score_history[0][i]}\
+                    \nAverage score for player {player.player_id}:\
+                    {avg_score[player.player_id]}\
+                    \nBest score for player {player.player_id}:\
+                    {best_score[player.player_id]}")

-        # if avg_score > best_score:
-        #     best_score = avg_score
-        #     game.level.player.agent.save_models()
+    plt.plot(score_history[0])
+
+    game.quit()
+
+    plt.show()
--- a/tmp/ppo/actor_torch_ppo
+++ b/tmp/ppo/actor_torch_ppo
--- a/tmp/ppo/critic_torch_ppo
+++ b/tmp/ppo/critic_torch_ppo