From b4a6e99fcede1c2c38a31b96713137ebaf7c288f Mon Sep 17 00:00:00 2001
From: Vasilis Valatsos <vasilvalat@gmail.com>
Date: Tue, 14 Nov 2023 22:44:43 +0100
Subject: [PATCH] Implemented (badly) agent

---
 agent/agent.py                                |  74 ++++++------
 agent/brain.py                                | 107 ++++++++----------
 {game/configs => configs}/__init__.py         |   0
 {game/configs => configs}/game/__init__.py    |   0
 .../game/monster_config.py                    |   2 +-
 .../configs => configs}/game/player_config.py |   0
 .../configs => configs}/game/spell_config.py  |   2 +-
 .../configs => configs}/game/weapon_config.py |   2 +-
 {game/configs => configs}/system/__init__.py  |   0
 .../system/window_config.py                   |   0
 {game/effects => effects}/__init__.py         |   0
 {game/effects => effects}/magic_effects.py    |   2 +-
 {game/effects => effects}/particle_effects.py |   2 +-
 {game/effects => effects}/weapon_effects.py   |   2 +-
 {game/entities => entities}/__init__.py       |   0
 .../components/__init__.py                    |   0
 .../components/_input.py                      |   7 +-
 .../components/animaton.py                    |   2 +-
 .../entities => entities}/components/audio.py |   2 +-
 .../components/combat.py                      |   2 +-
 .../components/movement.py                    |   0
 .../entities => entities}/components/stats.py |   0
 {game/entities => entities}/enemy.py          |   1 +
 {game/entities => entities}/observer.py       |   2 +-
 {game/entities => entities}/player.py         |  49 +++++++-
 {game/interface => interface}/__init__.py     |   0
 {game/interface => interface}/ui.py           |   0
 {game/interface => interface}/ui_settings.py  |   2 +-
 {game/interface => interface}/upgrade.py      |   0
 {game/level => level}/__init__.py             |   0
 {game/level => level}/camera.py               |   2 +-
 {game/level => level}/level.py                |  42 ++++---
 {game/level => level}/terrain.py              |   0
 game/main.py => main.py                       |  46 ++++----
 {game/utils => utils}/.settings.py.kate-swp   | Bin
 {game/utils => utils}/__init__.py             |   0
 {game/utils => utils}/debug.py                |   0
 {game/utils => utils}/resource_loader.py      |   8 +-
 38 files changed, 200 insertions(+), 158 deletions(-)
 rename {game/configs => configs}/__init__.py (100%)
 rename {game/configs => configs}/game/__init__.py (100%)
 rename {game/configs => configs}/game/monster_config.py (96%)
 rename {game/configs => configs}/game/player_config.py (100%)
 rename {game/configs => configs}/game/spell_config.py (89%)
 rename {game/configs => configs}/game/weapon_config.py (94%)
 rename {game/configs => configs}/system/__init__.py (100%)
 rename {game/configs => configs}/system/window_config.py (100%)
 rename {game/effects => effects}/__init__.py (100%)
 rename {game/effects => effects}/magic_effects.py (98%)
 rename {game/effects => effects}/particle_effects.py (98%)
 rename {game/effects => effects}/weapon_effects.py (96%)
 rename {game/entities => entities}/__init__.py (100%)
 rename {game/entities => entities}/components/__init__.py (100%)
 rename {game/entities => entities}/components/_input.py (97%)
 rename {game/entities => entities}/components/animaton.py (98%)
 rename {game/entities => entities}/components/audio.py (93%)
 rename {game/entities => entities}/components/combat.py (96%)
 rename {game/entities => entities}/components/movement.py (100%)
 rename {game/entities => entities}/components/stats.py (100%)
 rename {game/entities => entities}/enemy.py (99%)
 rename {game/entities => entities}/observer.py (97%)
 rename {game/entities => entities}/player.py (71%)
 rename {game/interface => interface}/__init__.py (100%)
 rename {game/interface => interface}/ui.py (100%)
 rename {game/interface => interface}/ui_settings.py (94%)
 rename {game/interface => interface}/upgrade.py (100%)
 rename {game/level => level}/__init__.py (100%)
 rename {game/level => level}/camera.py (94%)
 rename {game/level => level}/level.py (88%)
 rename {game/level => level}/terrain.py (100%)
 rename game/main.py => main.py (84%)
 rename {game/utils => utils}/.settings.py.kate-swp (100%)
 rename {game/utils => utils}/__init__.py (100%)
 rename {game/utils => utils}/debug.py (100%)
 rename {game/utils => utils}/resource_loader.py (89%)

diff --git a/agent/agent.py b/agent/agent.py
index 1e673e2..38ff030 100644
--- a/agent/agent.py
+++ b/agent/agent.py
@@ -1,20 +1,18 @@
-import random
-import torch
+import numpy as np
+import torch as T
 
-from numpy.random import default_rng
-
-from rl.brain import ActorNetwork, CriticNetwork, PPOMemory
+from .brain import ActorNetwork, CriticNetwork, PPOMemory
 
 
 class Agent:
 
-    def __init__(self, n_actions, input_dims, gamma = 0.99, alpha = 0.0003, policy_clip = 0.2, batch_size = 64, N=2048, n_epochs = 10, gae_lambda = 0.95):
-    
+    def __init__(self, input_dims, n_actions, gamma=0.99, alpha=0.0003, policy_clip=0.2, batch_size=64, N=2048, n_epochs=10, gae_lambda=0.95):
+
         self.gamma = gamma
         self.policy_clip = policy_clip
         self.n_epochs = n_epochs
         self.gae_lambda = gae_lambda
-        
+
         print("Preparing Actor model...")
         self.actor = ActorNetwork(input_dims, n_actions, alpha)
         print(f"Actor network activated using {self.actor.device}")
@@ -22,78 +20,84 @@ class Agent:
         self.critic = CriticNetwork(input_dims, alpha)
         print(f"Critic network activated using {self.critic.device}")
         self.memory = PPOMemory(batch_size)
-        
+
     def remember(self, state, action, probs, vals, reward, done):
         self.memory.store_memory(state, action, probs, vals, reward, done)
-        
+
     def save_models(self):
         print('... saving models ...')
         self.actor.save_checkpoint()
         self.critic.save_checkpoint()
         print('... done ...')
-        
+
     def load_models(self):
-        print('... loadng models ...')
+        print('... loading models ...')
         self.actor.load_checkpoint()
         self.critic.load_checkpoint()
         print('.. done ...')
-    
+
     def choose_action(self, observation):
-        state = T.tensor([observation], dtype = T.float).to(self.actor.device)
-        
+        state = observation.to(self.actor.device, dtype=T.float)
+
         dist = self.actor(state)
         value = self.critic(state)
         action = dist.sample()
-        
+
         probs = T.squeeze(dist.log_prob(action)).item()
         action = T.squeeze(action).item()
         value = T.squeeze(value).item()
-        
+
         return action, probs, value
-        
+
     def learn(self):
         for _ in range(self.n_epochs):
             state_arr, action_arr, old_probs_arr, vals_arr, reward_arr, done_arr, batches = self.memory.generate_batches()
-            
+
             values = vals_arr
-            advantage = np.zeros(len(reward_arr), dtype = np.float32)
-            
+            advantage = np.zeros(len(reward_arr), dtype=np.float32)
+
             for t in range(len(reward_arr)-1):
                 discount = 1
                 a_t = 0
                 for k in range(t, len(reward_arr)-1):
-                    a_t += discount*(reward_arr[k] + self.gamma*values[k+1]*(1-int(dones_arr[k])) - values[k])
+                    a_t += discount * \
+                        (reward_arr[k] + self.gamma*values[k+1]
+                         * (1-int(done_arr[k])) - values[k])
                     discount *= self.gamma * self.gae_lambda
                 advantage[t] = a_t
-            advantage = T.tensor(Advantage).to(self.actor.device)
-            
+            advantage = T.tensor(advantage).to(self.actor.device)
+
             values = T.tensor(values).to(self.actor.device)
             for batch in batches:
-                states = T.tensor(state_arr[batch], dtype = T.float).to(self.actor.device)
-                old_probs = T.tensor(old_probs_arr[batch]).to(self.actor.device)
+                states = T.tensor(state_arr[batch], dtype=T.float).to(
+                    self.actor.device)
+                old_probs = T.tensor(old_probs_arr[batch]).to(
+                    self.actor.device)
                 actions = T.tensor(action_arr[batch]).to(self.actor.device)
-                
+
                 dist = self.actor(states)
                 critic_value = self.critic(states)
-                
+
                 critic_value = T.squeeze(critic_value)
-                
+
                 new_probs = dist.log_prob(actions)
                 prob_ratio = new_probs.exp() / old_probs.exp()
                 weighted_probs = advantage[batch] * prob_ratio
-                weighted_clipped_probs = T.clamp(prob_ratio, 1-self.policy_clip, 1+self.policy_clip)*advantage[batch]
-                actor_loss = -T.min(weighted_probs, weighted_clipped_probs).mean()
-                
+                weighted_clipped_probs = T.clamp(
+                    prob_ratio, 1-self.policy_clip, 1+self.policy_clip)*advantage[batch]
+                actor_loss = -T.min(weighted_probs,
+                                    weighted_clipped_probs).mean()
+
                 returns = advantage[batch] + values[batch]
                 critic_loss = (returns - critic_value)**2
                 critic_loss = critic_loss.mean()
-                
+
                 total_loss = actor_loss + 0.5*critic_loss
-                
+
                 self.actor.optimizer.zero_grad()
                 self.critic.optimizer.zero_grad()
                 total_loss.backward()
                 self.actor.optimizer.step()
                 self.critic.optimizer.step()
-                
+
         self.memory.clear_memory()
diff --git a/agent/brain.py b/agent/brain.py
index a311b0d..3340340 100644
--- a/agent/brain.py
+++ b/agent/brain.py
@@ -5,6 +5,7 @@ import torch.nn as nn
 import torch.optim as optim
 from torch.distributions.categorical import Categorical
 
+
 class PPOMemory:
     def __init__(self, batch_size):
         self.states = []
@@ -13,24 +14,24 @@ class PPOMemory:
         self.actions = []
         self.rewards = []
         self.dones = []
-        
+
         self.batch_size = batch_size
-    
+
     def generate_batches(self):
 
         n_states = len(self.states)
         batch_start = np.arange(0, n_states, self.batch_size)
-        indices = np.arange(n_states, dtype = np.int64)
+        indices = np.arange(n_states, dtype=np.int64)
         np.random.shuffle(indices)
         batches = [indices[i:i+self.batch_size] for i in batch_start]
-        
-        return  np.array(self.states),\
-                np.array(self.actions),\
-                np.array(self.probs),\
-                np.array(self.vals),\
-                np.array(self.rewards),\
-                np.array(self.dones),\
-                batches
+
+        return np.array(self.states),\
+            np.array(self.actions),\
+            np.array(self.probs),\
+            np.array(self.vals),\
+            np.array(self.rewards),\
+            np.array(self.dones),\
+            batches
 
     def store_memory(self, state, action, probs, vals, reward, done):
         self.states.append(state)
@@ -38,7 +39,7 @@ class PPOMemory:
         self.vals.append(vals)
         self.rewards.append(reward)
         self.dones.append(done)
-        
+
     def clear_memory(self):
         self.states = []
         self.probs = []
@@ -47,81 +48,69 @@ class PPOMemory:
         self.rewards = []
         self.dones = []
 
+
 class ActorNetwork(nn.Module):
 
-    def __init__(self, input_dim, output_dim, alpha, fc1_dims = 256, fc2_dims = 256, chkpt_dir = 'tmp/ppo'):
+    def __init__(self, input_dim, output_dim, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
         super(ActorNetwork, self).__init__()
-        
+
         self.checkpoint_file = os.path.join(chkpt_dir, 'actor_torch_ppo')
         self.actor = nn.Sequential(
-                        nn.Linear(len(input_dim), fc1_dims),
-                        nn.ReLU(),
-                        nn.Linear(fc1_dims, fc2_dims),
-                        nn.ReLU(),
-                        nn.Linear(fc2_dims, len(output_dim)),
-                        nn.Softmax(dim=-1)
-                        )
-        
+            nn.Linear(input_dim, fc1_dims),
+            nn.ReLU(),
+            nn.Linear(fc1_dims, fc2_dims),
+            nn.ReLU(),
+            nn.Linear(fc2_dims, output_dim),
+            nn.Softmax(dim=-1)
+        )
+
         self.optimizer = optim.Adam(self.parameters(), lr=alpha)
-        
-        self.device = T.device('cuda:0' if T.cuda.is_available() else ('mps' if T.backends.mps.is_available() else 'cpu'))
- 
+
+        self.device = T.device('cuda:0' if T.cuda.is_available() else (
+            'mps' if T.backends.mps.is_available() else 'cpu'))
+
         self.to(self.device)
 
     def forward(self, state):
         dist = self.actor(state)
         dist = Categorical(dist)
-        
+
         return dist
 
     def save_checkpoint(self):
         T.save(self.state_dict(), self.checkpoint_file)
-        
+
     def load_checkpoint(self):
         self.load_state_dict(T.load(self.checkpoint_file))
 
+
 class CriticNetwork(nn.Module):
 
-    def __init__(self, input_dims, alpha, fc1_dims = 256, fc2_dims = 256, chkpt_dir = 'tmp/ppo'):
+    def __init__(self, input_dims, alpha, fc1_dims=256, fc2_dims=256, chkpt_dir='tmp/ppo'):
         super(CriticNetwork, self).__init__()
-        
+
         self.checkpoint_file = os.path.join(chkpt_dir, 'critic_torch_ppo')
         self.critic = nn.Sequential(
-                        nn.Linear(len(input_dims), fc1_dims),
-                        nn.ReLU(),
-                        nn.Linear(fc1_dims, fc2_dims),
-                        nn.ReLU(),
-                        nn.Linear(fc2_dims, 1)
-                        )
-        
+            nn.Linear(input_dims, fc1_dims),
+            nn.ReLU(),
+            nn.Linear(fc1_dims, fc2_dims),
+            nn.ReLU(),
+            nn.Linear(fc2_dims, 1)
+        )
+
         self.optimizer = optim.Adam(self.parameters(), lr=alpha)
-        self.device = T.device('cuda:0' if T.cuda.is_available() else ('mps' if T.backends.mps.is_available() else 'cpu'))
-        
+        self.device = T.device('cuda:0' if T.cuda.is_available() else (
+            'mps' if T.backends.mps.is_available() else 'cpu'))
+
         self.to(self.device)
-        
+
     def forward(self, state):
-        vale = self.critic(state)
-        
+        value = self.critic(state)
+
         return value
-        
+
     def save_checkpoint(self):
         T.save(self.state_dict(), self.checkpoint_file)
-        
+
     def load_checkpoint(self):
         self.load_state_dict(T.load(self.checkpoint_file))
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
diff --git a/game/configs/__init__.py b/configs/__init__.py
similarity index 100%
rename from game/configs/__init__.py
rename to configs/__init__.py
diff --git a/game/configs/game/__init__.py b/configs/game/__init__.py
similarity index 100%
rename from game/configs/game/__init__.py
rename to configs/game/__init__.py
diff --git a/game/configs/game/monster_config.py b/configs/game/monster_config.py
similarity index 96%
rename from game/configs/game/monster_config.py
rename to configs/game/monster_config.py
index 023fae1..22a5ca3 100644
--- a/game/configs/game/monster_config.py
+++ b/configs/game/monster_config.py
@@ -3,7 +3,7 @@ import os
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
 asset_path = os.path.join(
-    script_dir, '../../..', 'assets')
+    script_dir, '../..', 'assets')
 
 monster_data = {
     'squid': {'id': 1, 'health': 100, 'exp': 100, 'attack': 20, 'attack_type': 'slash', 'attack_sound': f'{asset_path}/audio/attack/slash.wav', 'speed': 3, 'knockback': 20, 'attack_radius': 80, 'notice_radius': 360},
diff --git a/game/configs/game/player_config.py b/configs/game/player_config.py
similarity index 100%
rename from game/configs/game/player_config.py
rename to configs/game/player_config.py
diff --git a/game/configs/game/spell_config.py b/configs/game/spell_config.py
similarity index 89%
rename from game/configs/game/spell_config.py
rename to configs/game/spell_config.py
index 0a4d8df..cab1efb 100644
--- a/game/configs/game/spell_config.py
+++ b/configs/game/spell_config.py
@@ -2,7 +2,7 @@ import os
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
 asset_path = os.path.join(
-    script_dir, '../../..', 'assets')
+    script_dir, '../..', 'assets')
 
 magic_data = {
     'flame': {'strength': 5, 'cost': 20, 'graphic': f"{asset_path}/graphics/particles/flame/fire.png"},
diff --git a/game/configs/game/weapon_config.py b/configs/game/weapon_config.py
similarity index 94%
rename from game/configs/game/weapon_config.py
rename to configs/game/weapon_config.py
index ab4df40..123dda4 100644
--- a/game/configs/game/weapon_config.py
+++ b/configs/game/weapon_config.py
@@ -2,7 +2,7 @@ import os
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
 asset_path = os.path.join(
-    script_dir, '../../..', 'assets')
+    script_dir, '../..', 'assets')
 
 weapon_data = {
     'sword': {'cooldown': 100, 'damage': 15, 'graphic': f"{asset_path}/graphics/weapons/sword/full.png"},
diff --git a/game/configs/system/__init__.py b/configs/system/__init__.py
similarity index 100%
rename from game/configs/system/__init__.py
rename to configs/system/__init__.py
diff --git a/game/configs/system/window_config.py b/configs/system/window_config.py
similarity index 100%
rename from game/configs/system/window_config.py
rename to configs/system/window_config.py
diff --git a/game/effects/__init__.py b/effects/__init__.py
similarity index 100%
rename from game/effects/__init__.py
rename to effects/__init__.py
diff --git a/game/effects/magic_effects.py b/effects/magic_effects.py
similarity index 98%
rename from game/effects/magic_effects.py
rename to effects/magic_effects.py
index 11ba981..d0f0904 100644
--- a/game/effects/magic_effects.py
+++ b/effects/magic_effects.py
@@ -10,7 +10,7 @@ class MagicPlayer:
         self.animation_player = animation_player
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../..', 'assets')
+            script_dir, '..', 'assets')
 
         # Sound Setup
         self.sounds = {
diff --git a/game/effects/particle_effects.py b/effects/particle_effects.py
similarity index 98%
rename from game/effects/particle_effects.py
rename to effects/particle_effects.py
index 4cc8fcf..2203134 100644
--- a/game/effects/particle_effects.py
+++ b/effects/particle_effects.py
@@ -10,7 +10,7 @@ class AnimationPlayer:
 
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../..', 'assets')
+            script_dir, '..', 'assets')
 
         self.frames = {
             # magic
diff --git a/game/effects/weapon_effects.py b/effects/weapon_effects.py
similarity index 96%
rename from game/effects/weapon_effects.py
rename to effects/weapon_effects.py
index 5f709b1..6a1997d 100644
--- a/game/effects/weapon_effects.py
+++ b/effects/weapon_effects.py
@@ -9,7 +9,7 @@ class Weapon(pygame.sprite.Sprite):
 
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../..', 'assets')
+            script_dir, '..', 'assets')
 
         self.sprite_type = 'weapon'
         direction = player._input.status.split('_')[0]
diff --git a/game/entities/__init__.py b/entities/__init__.py
similarity index 100%
rename from game/entities/__init__.py
rename to entities/__init__.py
diff --git a/game/entities/components/__init__.py b/entities/components/__init__.py
similarity index 100%
rename from game/entities/components/__init__.py
rename to entities/components/__init__.py
diff --git a/game/entities/components/_input.py b/entities/components/_input.py
similarity index 97%
rename from game/entities/components/_input.py
rename to entities/components/_input.py
index 6cdbd53..d54efdd 100644
--- a/game/entities/components/_input.py
+++ b/entities/components/_input.py
@@ -36,18 +36,15 @@ class InputHandler:
         self.magic_swap_time = None
 
         # Setup Action Space
+        self.num_actions = 7
         self.action = 10
 
-    def check_input(self, speed, hitbox, obstacle_sprites, rect, player):
+    def check_input(self, button, speed, hitbox, obstacle_sprites, rect, player):
 
         self.action = 10
 
         if not self.attacking and self.can_move:
 
-            keys = pygame.key.get_pressed()
-
-            button = randint(0, 4)
-
             self.move_time = pygame.time.get_ticks()
 
             # Movement Input
diff --git a/game/entities/components/animaton.py b/entities/components/animaton.py
similarity index 98%
rename from game/entities/components/animaton.py
rename to entities/components/animaton.py
index efc0d03..f1148c4 100644
--- a/game/entities/components/animaton.py
+++ b/entities/components/animaton.py
@@ -19,7 +19,7 @@ class AnimationHandler:
     def import_assets(self, position):
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../../..', 'assets', 'graphics')
+            script_dir, '../..', 'assets', 'graphics')
 
         if self.sprite_type == 'player':
 
diff --git a/game/entities/components/audio.py b/entities/components/audio.py
similarity index 93%
rename from game/entities/components/audio.py
rename to entities/components/audio.py
index f8a41b5..9845770 100644
--- a/game/entities/components/audio.py
+++ b/entities/components/audio.py
@@ -9,7 +9,7 @@ class AudioHandler:
     def __init__(self, sprite_type, monster_name=None):
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../../..', 'assets', 'audio')
+            script_dir, '../..', 'assets', 'audio')
 
         if sprite_type == 'player':
             pass
diff --git a/game/entities/components/combat.py b/entities/components/combat.py
similarity index 96%
rename from game/entities/components/combat.py
rename to entities/components/combat.py
index e7babd6..e432783 100644
--- a/game/entities/components/combat.py
+++ b/entities/components/combat.py
@@ -33,7 +33,7 @@ class CombatHandler:
         # Import Sounds
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../../..', 'assets', 'audio')
+            script_dir, '../..', 'assets', 'audio')
 
         self.weapon_attack_sound = pygame.mixer.Sound(
             f"{asset_path}/sword.wav")
diff --git a/game/entities/components/movement.py b/entities/components/movement.py
similarity index 100%
rename from game/entities/components/movement.py
rename to entities/components/movement.py
diff --git a/game/entities/components/stats.py b/entities/components/stats.py
similarity index 100%
rename from game/entities/components/stats.py
rename to entities/components/stats.py
diff --git a/game/entities/enemy.py b/entities/enemy.py
similarity index 99%
rename from game/entities/enemy.py
rename to entities/enemy.py
index fee03ae..7c96958 100644
--- a/game/entities/enemy.py
+++ b/entities/enemy.py
@@ -17,6 +17,7 @@ class Enemy(pygame.sprite.Sprite):
         self.name = name
         self.visible_sprites = visible_sprites
 
+        self.position = position
         # Setup Graphics
         self.audio = AudioHandler(self.sprite_type, self.name)
         self.animation_player = AnimationPlayer()
diff --git a/game/entities/observer.py b/entities/observer.py
similarity index 97%
rename from game/entities/observer.py
rename to entities/observer.py
index 7b3a70e..1578bb9 100644
--- a/game/entities/observer.py
+++ b/entities/observer.py
@@ -13,7 +13,7 @@ class Observer(pygame.sprite.Sprite):
 
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../..', 'assets')
+            script_dir, '..', 'assets')
 
         self.image = pygame.image.load(
             f"{asset_path}/graphics/observer.png").convert_alpha()
diff --git a/game/entities/player.py b/entities/player.py
similarity index 71%
rename from game/entities/player.py
rename to entities/player.py
index bdb666e..d9679ca 100644
--- a/game/entities/player.py
+++ b/entities/player.py
@@ -10,14 +10,18 @@ from .components.animaton import AnimationHandler
 
 from effects.particle_effects import AnimationPlayer
 
+from agent.agent import Agent
+
 
 class Player(pygame.sprite.Sprite):
 
-    def __init__(self, position, groups, obstacle_sprites, visible_sprites, attack_sprites, attackable_sprites, role):
+    def __init__(self, position, groups, obstacle_sprites, visible_sprites, attack_sprites, attackable_sprites, role, player_id, extract_features, convert_features_to_tensor):
         super().__init__(groups)
 
         # Setup Sprites
         self.sprite_type = 'player'
+        self.status = 'down'
+        self.player_id = player_id
         self.visible_sprites = visible_sprites
         self.attack_sprites = attack_sprites
         self.obstacle_sprites = obstacle_sprites
@@ -40,6 +44,14 @@ class Player(pygame.sprite.Sprite):
 
         self.distance_direction_from_enemy = None
 
+        # Setup AI
+        self.extract_features = extract_features
+        self.convert_features_to_tensor = convert_features_to_tensor
+        self.agent = Agent(input_dims=398, n_actions=self._input.num_actions)
+        self.state_tensor = None
+        self.action_tensor = None
+        self.reward_tensor = None
+
     def get_status(self):
         if self._input.movement.direction.x == 0 and self._input.movement.direction.y == 0:
             if 'idle' not in self.status and 'attack' not in self.status:
@@ -85,10 +97,41 @@ class Player(pygame.sprite.Sprite):
         spell_damage = magic_data[self._input.combat.magic]['strength']
         return (base_damage + spell_damage)
 
+    def get_current_state(self):
+        pass
+
+    def is_dead(self):
+        if self.stats.health == 0:
+            self.stats.exp = -10
+            return True
+        else:
+            return False
+
     def update(self):
+        self.extract_features()
+        self.convert_features_to_tensor()
+
+        # Choose action based on current state
+        action, probs, value = self.agent.choose_action(self.state_tensor)
+
+        print(action)
+        # Apply chosen action
+        self._input.check_input(action, self.stats.speed, self.animation.hitbox,
+                                self.obstacle_sprites, self.animation.rect, self)
+
+        done = self.is_dead()
+
+        self.extract_features()
+        self.convert_features_to_tensor()
+
+        self.agent.remember(self.state_tensor, self.action_tensor,
+                            probs, value, self.reward_tensor, done)
+
+        if done:
+            self.agent.learn()
+            self.agent.memory.clear_memory()
+
         # Refresh objects based on input
-        self._input.check_input(
-            self.stats.speed, self.animation.hitbox, self.obstacle_sprites, self.animation.rect, self)
         self.status = self._input.status
 
         # Animate
diff --git a/game/interface/__init__.py b/interface/__init__.py
similarity index 100%
rename from game/interface/__init__.py
rename to interface/__init__.py
diff --git a/game/interface/ui.py b/interface/ui.py
similarity index 100%
rename from game/interface/ui.py
rename to interface/ui.py
diff --git a/game/interface/ui_settings.py b/interface/ui_settings.py
similarity index 94%
rename from game/interface/ui_settings.py
rename to interface/ui_settings.py
index cfa39fe..eb6bfa6 100644
--- a/game/interface/ui_settings.py
+++ b/interface/ui_settings.py
@@ -2,7 +2,7 @@ import os
 
 script_dir = os.path.dirname(os.path.abspath(__file__))
 asset_path = os.path.join(
-    script_dir, '../..', 'assets')
+    script_dir, '..', 'assets')
 
 # ui
 BAR_HEIGHT = 20
diff --git a/game/interface/upgrade.py b/interface/upgrade.py
similarity index 100%
rename from game/interface/upgrade.py
rename to interface/upgrade.py
diff --git a/game/level/__init__.py b/level/__init__.py
similarity index 100%
rename from game/level/__init__.py
rename to level/__init__.py
diff --git a/game/level/camera.py b/level/camera.py
similarity index 94%
rename from game/level/camera.py
rename to level/camera.py
index d991a62..49f52b9 100644
--- a/game/level/camera.py
+++ b/level/camera.py
@@ -16,7 +16,7 @@ class Camera(pygame.sprite.Group):
         # Creating the floor
         script_dir = os.path.dirname(os.path.abspath(__file__))
         image_path = os.path.join(
-            script_dir, '../..', 'assets', 'graphics', 'tilemap', 'ground.png')
+            script_dir, '..', 'assets', 'graphics', 'tilemap', 'ground.png')
 
         self.floor_surf = pygame.image.load(image_path).convert()
         self.floor_rect = self.floor_surf.get_rect(topleft=(0, 0))
diff --git a/game/level/level.py b/level/level.py
similarity index 88%
rename from game/level/level.py
rename to level/level.py
index 471eaec..6f98f2c 100644
--- a/game/level/level.py
+++ b/level/level.py
@@ -9,7 +9,6 @@ from utils.debug import debug
 from utils.resource_loader import import_csv_layout, import_folder
 
 from interface.ui import UI
-from interface.upgrade import Upgrade
 
 from entities.observer import Observer
 from entities.player import Player
@@ -21,11 +20,16 @@ from .camera import Camera
 
 class Level:
 
-    def __init__(self):
+    def __init__(self, extract_features,
+                 convert_features_to_tensor):
 
         # General Settings
         self.game_paused = False
 
+        # AI setup
+        self.extract_features = extract_features
+        self.convert_features_to_tensor = convert_features_to_tensor
+
         # Get display surface
         self.display_surface = pygame.display.get_surface()
 
@@ -37,18 +41,17 @@ class Level:
 
         # Sprite setup and entity generation
         self.create_map()
-
-        # UI setup
-        self.ui = UI()
-        # self.upgrade = Upgrade(self.player)
-
         self.get_players_enemies()
         self.get_distance_direction()
 
+        # UI setup
+        self.ui = UI()
+
     def create_map(self):
+        player_id = 0
         script_dir = os.path.dirname(os.path.abspath(__file__))
         asset_path = os.path.join(
-            script_dir, '../..', 'assets')
+            script_dir, '..', 'assets')
         layouts = {
             'boundary': import_csv_layout(f"{asset_path}/map/FloorBlocks.csv"),
             'grass': import_csv_layout(f"{asset_path}/map/Grass.csv"),
@@ -89,17 +92,20 @@ class Level:
                             elif col == '400':
                                 # Player Generation
                                 Player(
-                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'tank')
+                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'tank', player_id, self.extract_features, self.convert_features_to_tensor)
+                                player_id += 1
 
                             elif col == '401':
                                 # Player Generation
                                 Player(
-                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'warrior')
+                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'warrior', player_id, self.extract_features, self.convert_features_to_tensor)
+                                player_id += 1
 
                             elif col == '402':
                                 # Player Generation
                                 Player(
-                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'mage')
+                                    (x, y), [self.visible_sprites], self.obstacle_sprites, self.visible_sprites, self.attack_sprites, self.attackable_sprites, 'mage', player_id, self.extract_features, self.convert_features_to_tensor)
+                                player_id += 1
 
                             else:
                                 # Monster Generation
@@ -167,6 +173,14 @@ class Level:
 
         debug('v0.6')
 
+        for player in self.player_sprites:
+            if player.is_dead():
+                print(player.stats.health)
+                player.kill()
+
+        if self.player_sprites == []:
+            self.__init__()
+
         if not self.game_paused:
             # Update the game
             for player in self.player_sprites:
@@ -177,11 +191,5 @@ class Level:
             self.apply_damage_to_player()
             self.visible_sprites.update()
 
-            # self.visible_sprites.enemy_update(self.player)
-            # self.player_attack_logic()
         else:
             debug('PAUSED')
-
-        for player in self.player_sprites:
-            if player.stats.health <= 0:
-                player.kill()
diff --git a/game/level/terrain.py b/level/terrain.py
similarity index 100%
rename from game/level/terrain.py
rename to level/terrain.py
diff --git a/game/main.py b/main.py
similarity index 84%
rename from game/main.py
rename to main.py
index 960e2a8..ec28e8d 100644
--- a/game/main.py
+++ b/main.py
@@ -19,10 +19,11 @@ class Game:
         pygame.display.set_caption('Pneuma')
         self.clock = pygame.time.Clock()
 
-        self.level = Level()
+        self.level = Level(self.extract_features,
+                           self.convert_features_to_tensor)
 
         # Sound
-        main_sound = pygame.mixer.Sound('../assets/audio/main.ogg')
+        main_sound = pygame.mixer.Sound('assets/audio/main.ogg')
         main_sound.set_volume(0.4)
         main_sound.play(loops=-1)
 
@@ -35,14 +36,17 @@ class Game:
         for i, player in enumerate(self.level.player_sprites):
 
             player_action_features = {
+                "player_id": player.player_id,
                 "player_action": player._input.action
-                }
+            }
 
             player_reward_features = {
+                "player_id": player.player_id,
                 "player_exp": player.stats.exp
-                }
+            }
 
             player_state_features = {
+                "player_id": player.player_id,
                 "player_position": player.rect.center,
                 "player role": player.stats.role_id,
                 "player_health": player.stats.health,
@@ -80,10 +84,6 @@ class Game:
 
     def convert_features_to_tensor(self):
 
-        self.state_tensors = []
-        self.action_tensors = []
-        self.reward_tensors = []
-
         for features in self.state_features:
             info_array = []
 
@@ -123,10 +123,11 @@ class Game:
                 info_array.extend(enemy_info)
 
             state_tensor = torch.tensor(
-            np.array(info_array, dtype=np.float32))
-
-            self.state_tensors.append(state_tensor)
+                np.array(info_array, dtype=np.float32))
 
+            for player in self.level.player_sprites:
+                if player.player_id == features["player_id"]:
+                    player.state_tensor = state_tensor
 
         for features in self.action_features:
             info_array = []
@@ -134,12 +135,14 @@ class Game:
             # Adding action features
             action_info = [
                 features["player_action"]
-                ]
+            ]
 
             action_tensor = torch.tensor(
                 np.array(action_info, dtype=np.float32))
 
-            self.action_tensors.append(action_tensor)
+            for player in self.level.player_sprites:
+                if player.player_id == features["player_id"]:
+                    player.action_tensor = action_tensor
 
         for features in self.reward_features:
             info_array = []
@@ -147,12 +150,14 @@ class Game:
             # Adding reward features
             reward_info = [
                 features["player_exp"]
-                ]
+            ]
 
             reward_tensor = torch.tensor(
                 np.array(reward_info, dtype=np.float32))
 
-            self.reward_tensors.append(reward_tensor)
+            for player in self.level.player_sprites:
+                if player.player_id == features["player_id"]:
+                    player.reward_tensor = reward_tensor
 
     def run(self):
 
@@ -166,10 +171,8 @@ class Game:
 
         self.screen.fill(WATER_COLOR)
 
-        self.extract_features()
-        self.convert_features_to_tensor()
+        self.level.run(who='observer')
 
-        self.level.run('observer')
         pygame.display.update()
         self.clock.tick(FPS)
 
@@ -179,9 +182,4 @@ if __name__ == '__main__':
     game = Game()
     for i in range(0, 10000):
         game.run()
-        game.extract_features()
-        game.convert_features_to_tensor()
-        if i == 100:
-            print(game.reward_tensors)
-            print(game.action_tensors)
-            print(game.state_tensors)
+        print(i)
diff --git a/game/utils/.settings.py.kate-swp b/utils/.settings.py.kate-swp
similarity index 100%
rename from game/utils/.settings.py.kate-swp
rename to utils/.settings.py.kate-swp
diff --git a/game/utils/__init__.py b/utils/__init__.py
similarity index 100%
rename from game/utils/__init__.py
rename to utils/__init__.py
diff --git a/game/utils/debug.py b/utils/debug.py
similarity index 100%
rename from game/utils/debug.py
rename to utils/debug.py
diff --git a/game/utils/resource_loader.py b/utils/resource_loader.py
similarity index 89%
rename from game/utils/resource_loader.py
rename to utils/resource_loader.py
index e52f2bc..6436d75 100644
--- a/game/utils/resource_loader.py
+++ b/utils/resource_loader.py
@@ -2,17 +2,19 @@ import pygame
 from csv import reader
 from os import walk
 
+
 def import_csv_layout(path):
     terrain_map = []
     with open(path) as level_map:
-        layout = reader(level_map, delimiter = ',')
+        layout = reader(level_map, delimiter=',')
         for row in layout:
             terrain_map.append(list(row))
     return terrain_map
-    
+
+
 def import_folder(path):
     surface_list = []
-    
+
     for _, __, img_files in walk(path):
         for image in img_files:
             full_path = f"{path}/{image}"