Update rewared structure (fixed major bugs)

2023-11-23 12:44:23 +01:00 · 2023-11-23 12:44:23 +01:00 · 1a6ed25673
commit 1a6ed25673
parent ce4a90ac43
11 changed files with 106 additions and 58 deletions
--- a/assets/graphics/icon.png
+++ b/assets/graphics/icon.png
--- a/configs/game/monster_config.py
+++ b/configs/game/monster_config.py
@ -6,7 +6,7 @@ asset_path = os.path.join(
    script_dir, '../..', 'assets')
 monster_data = {
-    'squid': {'id': 1, 'health': 100, 'exp': 100, 'attack': 20, 'attack_type': 'slash', 'attack_sound': f'{asset_path}/audio/attack/slash.wav', 'speed': 3, 'knockback': 20, 'attack_radius': 80, 'notice_radius': 360},
+    'squid': {'id': 1, 'health': .1, 'exp': 1, 'attack': .5, 'attack_type': 'slash', 'speed': 3, 'knockback': 20, 'attack_radius': 80, 'notice_radius': 360},
-    'raccoon': {'id': 2, 'health': 300, 'exp': 250, 'attack': 40, 'attack_type': 'claw',  'attack_sound': f'{asset_path}/audio/attack/claw.wav', 'speed': 2, 'knockback': 20, 'attack_radius': 120, 'notice_radius': 400},
+    'raccoon': {'id': 2, 'health': .3, 'exp': 2.5, 'attack': .8, 'attack_type': 'claw',  'speed': 2, 'knockback': 20, 'attack_radius': 120, 'notice_radius': 400},
-    'spirit': {'id': 3, 'health': 100, 'exp': 110, 'attack': 8, 'attack_type': 'thunder', 'attack_sound': f'{asset_path}/audio/attack/fireball.wav', 'speed': 4, 'knockback': 20, 'attack_radius': 60, 'notice_radius': 350},
+    'spirit': {'id': 3, 'health': .1, 'exp': 1.1, 'attack': .6, 'attack_type': 'thunder', 'speed': 4, 'knockback': 20, 'attack_radius': 60, 'notice_radius': 350},
-    'bamboo': {'id': 4, 'health': 70, 'exp': 120, 'attack': 6, 'attack_type': 'leaf_attack', 'attack_sound': f'{asset_path}/audio/attack/slash.wav', 'speed': 3, 'knockback': 20, 'attack_radius': 50, 'notice_radius': 300}}
+    'bamboo': {'id': 4, 'health': .07, 'exp': 1.2, 'attack': .4, 'attack_type': 'leaf_attack', 'speed': 3, 'knockback': 20, 'attack_radius': 50, 'notice_radius': 300}}
--- a/configs/game/player_config.py
+++ b/configs/game/player_config.py
@ -1,9 +1,9 @@
 tank_stats = {
    'role_id': 1,
-    'health': 150,
+    'health': 1.5,
-    'energy': 40,
+    'energy': .4,
-    'attack': 7,
+    'attack': .7,
-    'magic': 3,
+    'magic': .3,
    'speed': 3
 }
--- a/configs/game/spell_config.py
+++ b/configs/game/spell_config.py
@ -5,5 +5,5 @@ asset_path = os.path.join(
    script_dir, '../..', 'assets')
 magic_data = {
-    'flame': {'strength': 5, 'cost': 20, 'graphic': f"{asset_path}/graphics/particles/flame/fire.png"},
+    'flame': {'strength': 5, 'cost': .020, 'graphic': f"{asset_path}/graphics/particles/flame/fire.png"},
-    'heal': {'strength': 20, 'cost': 10, 'graphic': f"{asset_path}/graphics/particles/heal/heal.png"}}
+    'heal': {'strength': 20, 'cost': .010, 'graphic': f"{asset_path}/graphics/particles/heal/heal.png"}}
--- a/entities/components/stats.py
+++ b/entities/components/stats.py
@ -42,6 +42,12 @@ class StatsHandler:
        else:
            self.energy = self.stats['energy']
    def health_recovery(self):
        if self.energy < self.stats['health']:
            self.energy += 0.15
        else:
            self.energy = self.stats['energy']
    def get_value_by_index(self, index):
        return list(self.stats.values())[index]
--- a/entities/player.py
+++ b/entities/player.py
@ -16,7 +16,16 @@ from agents.ppo.agent import Agent
 class Player(pygame.sprite.Sprite):
-    def __init__(self, position, groups, obstacle_sprites, visible_sprites, attack_sprites, attackable_sprites, role, player_id):
+    def __init__(self,
                 position,
                 groups,
                 obstacle_sprites,
                 visible_sprites,
                 attack_sprites,
                 attackable_sprites,
                 role,
                 player_id):
        super().__init__(groups)
        # Setup Sprites
@ -79,7 +88,9 @@ class Player(pygame.sprite.Sprite):
                            offset = pygame.math.Vector2(0, 75)
                            for leaf in range(randint(3, 6)):
                                self.animation_player.create_grass_particles(
-                                    position=pos - offset, groups=[self.visible_sprites])
+                                    position=pos - offset,
                                    groups=[self.visible_sprites])
                            target_sprite.kill()
                        else:
                            target_sprite.get_damaged(
@ -109,13 +120,12 @@ class Player(pygame.sprite.Sprite):
        self.reward_features = [
            self.stats.exp,
-            # nearest_dist,
+            np.exp(-nearest_dist**2),
-            -nearest_enemy.stats.health,
+            np.exp(-nearest_enemy.stats.health**2),
-            self.stats.health
+            -np.exp(-self.stats.health)
        ]
        self.state_features = [
            # TODO: Find a way to not use magic numbers
            np.exp(-self.rect.center[0]),
            np.exp(-self.rect.center[1]),
            self._input.movement.direction.x,
@ -127,7 +137,6 @@ class Player(pygame.sprite.Sprite):
        enemy_states = []
        for distance, direction, enemy in sorted_distances[:5]:
            # TODO: Find a way to not use magic numbers
            enemy_states.extend([
                np.exp(-distance),
                direction[0],
@ -156,12 +165,13 @@ class Player(pygame.sprite.Sprite):
            n_epochs=4)
        try:
            self.agent.load_models()
-        except FileNotFoundError as e:
+        except FileNotFoundError:
-            print(f"{e}. Skipping loading...")
+            print("FileNotFoundError for agent.load_model().\
                Skipping loading...")
    def is_dead(self):
-        if self.stats.health == 0:
+        if self.stats.health <= 0:
-            self.stats.exp = -100
+            self.stats.exp = max(0, self.stats.exp - .5)
            return True
        else:
            return False
@ -182,11 +192,9 @@ class Player(pygame.sprite.Sprite):
                                self.animation.rect,
                                self)
        self.done = self.is_dead()
        self.score = self.stats.exp
        self.agent.remember(self.state_features, action,
-                            probs, value, self.stats.exp, self.done)
+                            probs, value, self.stats.exp, self.is_dead())
        if self.n_steps % self.N == 0:
            self.agent.learn()
@ -194,7 +202,7 @@ class Player(pygame.sprite.Sprite):
        self.get_current_state()
-        if self.done:
+        if self.is_dead():
            self.agent.learn()
        # Refresh objects based on input
@ -207,5 +215,6 @@ class Player(pygame.sprite.Sprite):
        self.rect = self.animation.rect
        # Cooldowns and Regen
        self.stats.health_recovery()
        self.stats.energy_recovery()
        self._input.cooldowns(self._input.combat.vulnerable)
--- a/game.py
+++ b/game.py
@ -13,17 +13,26 @@ class Game:
        self.screen = pygame.display.set_mode(
            (WIDTH, HEIGHT))
        pygame.display.set_caption('Pneuma')
        img = pygame.image.load('assets/graphics/icon.png')
        pygame.display.set_icon(img)
        self.clock = pygame.time.Clock()
        self.level = Level()
    def calc_score(self):
        self.scores = [0 for _ in range(len(self.level.player_sprites))]
        for player in self.level.player_sprites:
            self.scores[player.player_id] = player.stats.exp
    def run(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
-                pygame.quit()
+                self.quit()
                sys.exit()
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_m:
                    self.level.toggle_menu()
@ -34,3 +43,7 @@ class Game:
        pygame.display.update()
        self.clock.tick(FPS)
    def quit(self):
        pygame.quit()
        sys.exit()
--- a/level/level.py
+++ b/level/level.py
@ -1,5 +1,6 @@
 import os
 import pygame
 import numpy as np
 from random import choice
@ -46,6 +47,7 @@ class Level:
        else:
            for player in self.player_sprites:
                player.get_max_num_states()
        self.dead_players = np.zeros(len(self.player_sprites))
        # UI setup
        self.ui = UI()
@ -174,11 +176,13 @@ class Level:
            self.visible_sprites.custom_draw(self.player)
            self.ui.display(self.player)
-        debug('v0.6')
+        debug('v0.8')
        for player in self.player_sprites:
            if player.is_dead():
-                self.done = True
+                self.dead_players[player.player_id] = True
        self.done = True if self.dead_players.all() == 1 else False
        if not self.game_paused:
            # Update the game
--- a/main.py
+++ b/main.py
@ -1,3 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from game import Game
 from tqdm import tqdm
@ -6,54 +9,67 @@ environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
 if __name__ == '__main__':
-    n_episodes = 3000
+
    n_episodes = 1000
    game_len = 10000
    figure_file = 'plots/score.png'
    score_history = []
    best_score = 0
    avg_score = 0
    agent_list = []
    game_len = 5000
    game = Game()
    agent_list = []
    exp_points_list = []
    score_history = np.zeros(
        shape=(len(game.level.player_sprites), n_episodes, ))
    best_score = np.zeros(len(game.level.player_sprites))
    avg_score = np.zeros(len(game.level.player_sprites))
    for i in tqdm(range(n_episodes)):
        # TODO: Make game.level.reset_map() so we don't __init__ everything all the time (such a waste)
        if i != 0:
            game.level.__init__(reset=True)
        # TODO: Make game.level.reset_map() so we don't pull out and load the agent every time (There is -definitevly- a better way)
            for player in game.level.player_sprites:
-            for player_id, agent in agent_list:
+                for agent in agent_list:
-                if player.player_id == player_id:
+                    player.agent = agent_list[player.player_id]
-                    player.agent = agent
+                    player.stats.exp = score_history[player.player_id][i-1]
        agent_list = []
        done = False
        score = 0
        for _ in tqdm(range(game_len)):
            if not game.level.done:
                game.run()
            else:
                break
        for player in game.level.player_sprites:
            agent_list.append((player.player_id, player.agent))
-        if i == n_episodes-1 and game.level.enemy_sprites != []:
+        for j in range(game_len):
            if not game.level.done:
                game.run()
                game.calc_score()
                if (j == game_len-1 or game.level.done) and game.level.enemy_sprites != []:
                    for player in game.level.player_sprites:
                        for enemy in game.level.enemy_sprites:
-                    player.stats.exp -= 5
+                            player.stats.exp *= .95
-                player.update()
+            else:
                break
        for player in game.level.player_sprites:
            agent_list.append(player.agent)
            exp_points = player.stats.exp
            score_history[player.player_id][i] = exp_points
            avg_score[player.player_id] = np.mean(
                score_history[player.player_id])
            if avg_score[player.player_id] >= best_score[player.player_id]:
                player.agent.save_models()
                best_score[player.player_id] = avg_score[player.player_id]
-        # TODO: Make it so that scores appear here for each player
+            print(
-        # score_history.append(game.level.player.score)
+                f"\nCumulative score for player {player.player_id}:\
-        # print(score)
+                    {score_history[0][i]}\
-        # avg_score = np.mean(score_history[-100:])
+                    \nAverage score for player {player.player_id}:\
                    {avg_score[player.player_id]}\
                    \nBest score for player {player.player_id}:\
                    {best_score[player.player_id]}")
-        # if avg_score > best_score:
+    plt.plot(score_history[0])
-        #     best_score = avg_score
+
-        #     game.level.player.agent.save_models()
+    game.quit()
    plt.show()
--- a/tmp/ppo/actor_torch_ppo
+++ b/tmp/ppo/actor_torch_ppo
--- a/tmp/ppo/critic_torch_ppo
+++ b/tmp/ppo/critic_torch_ppo