diff --git a/agents/ppo/agent.py b/agents/ppo/agent.py
index c8aeade..0f28d7d 100644
--- a/agents/ppo/agent.py
+++ b/agents/ppo/agent.py
@@ -1,13 +1,15 @@
 import numpy as np
 import torch as T
 
+from tqdm import tqdm
+
 from .brain import ActorNetwork, CriticNetwork, PPOMemory
 
 
 class Agent:
 
     def __init__(self, input_dims, n_actions, gamma=0.99, alpha=0.0003,
-                 policy_clip=0.2, batch_size=64, N=2048, n_epochs=10,
+                 policy_clip=0.2, batch_size=64, n_epochs=10,
                  gae_lambda=0.95, entropy_coef=0.001, chkpt_dir='tmp/ppo'):
 
         self.gamma = gamma
@@ -50,7 +52,12 @@ class Agent:
         return action, probs, value
 
     def learn(self):
-        for _ in range(self.n_epochs):
+        for _ in tqdm(range(self.n_epochs),
+                      desc='Learning...',
+                      dynamic_ncols=True,
+                      leave=False,
+                      ascii=True):
+
             state_arr, action_arr, old_probs_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches()
 
             values = vals_arr
@@ -102,11 +109,11 @@ class Agent:
                 self.critic.optimizer.zero_grad()
                 self.total_loss.backward()
 
-                # T.nn.utils.clip_grad_norm_(
-                #     self.actor.parameters(), max_norm=2)
-                #
-                # T.nn.utils.clip_grad_norm_(
-                #     self.critic.parameters(), max_norm=2)
+                T.nn.utils.clip_grad_norm_(
+                    self.actor.parameters(), max_norm=2)
+
+                T.nn.utils.clip_grad_norm_(
+                    self.critic.parameters(), max_norm=2)
                 #
                 # # Calculate the gradient norms for both networks
                 # actor_grad_norm = T.nn.utils.clip_grad_norm_(
diff --git a/agents/ppo/brain.py b/agents/ppo/brain.py
index 6dc02b9..3e52a99 100644
--- a/agents/ppo/brain.py
+++ b/agents/ppo/brain.py
@@ -59,9 +59,9 @@ class ActorNetwork(nn.Module):
 
         self.actor = nn.Sequential(
             nn.Linear(input_dim, fc1_dims),
-            nn.ReLU(),
+            nn.LeakyReLU(),
             nn.Linear(fc1_dims, fc2_dims),
-            nn.ReLU(),
+            nn.LeakyReLU(),
             nn.Linear(fc2_dims, output_dim),
             nn.Softmax(dim=-1)
         )
diff --git a/camera.py b/camera.py
index 12ee9c5..842fe1b 100644
--- a/camera.py
+++ b/camera.py
@@ -15,11 +15,6 @@ class Camera(pygame.sprite.Group):
         self.half_height = self.display_surface.get_size()[1] // 2
         self.offset = pygame.math.Vector2(100, 200)
 
-        # Creating the floor
-        image_path = import_assets(os.path.join('graphics',
-                                                'tilemap',
-                                                'ground.png'))
-
         self.floor_surf = pygame.image.load(
             import_assets(
                 os.path.join('graphics',
diff --git a/configs/game/monster_config.py b/configs/game/monster_config.py
index cf1c451..14d5344 100644
--- a/configs/game/monster_config.py
+++ b/configs/game/monster_config.py
@@ -30,7 +30,7 @@ monster_data = {
                'notice_radius': 350},
 
     'bamboo': {'id': 4,
-               'health': 70,
+               'health': 50,
                'exp': 9,
                'attack': 20,
                'attack_type': 'leaf_attack',
diff --git a/configs/game/player_config.py b/configs/game/player_config.py
index a3471a9..940e576 100644
--- a/configs/game/player_config.py
+++ b/configs/game/player_config.py
@@ -1,10 +1,10 @@
 tank_stats = {
     'role_id': 1,
     'health': 150,
-    'energy': 40,
+    'energy': 70,
     'attack': 10,
-    'magic': 3,
-    'speed': 3
+    'magic': 5,
+    'speed': 5
 }
 
 mage_stats = {
diff --git a/entities/player.py b/entities/player.py
index ca2641e..1260052 100644
--- a/entities/player.py
+++ b/entities/player.py
@@ -57,7 +57,6 @@ class Player(pygame.sprite.Sprite):
                     alpha,
                     policy_clip,
                     batch_size,
-                    N,
                     n_epochs,
                     gae_lambda,
                     chkpt_dir,
@@ -75,7 +74,6 @@ class Player(pygame.sprite.Sprite):
             alpha=alpha,
             policy_clip=policy_clip,
             batch_size=batch_size,
-            N=N,
             n_epochs=n_epochs,
             gae_lambda=gae_lambda,
             entropy_coef=entropy_coef,
@@ -168,32 +166,32 @@ class Player(pygame.sprite.Sprite):
 
         self.action_features = [self._input.action]
 
-        # self.reward = [
-        #     np.log(1 + self.stats.exp),
-        #
-        #     fermi(nearest_dist, 50),
-        #
-        #     fermi(
-        #         nearest_enemy.stats.health,
-        #         nearest_enemy.stats.monster_info['health']
-        #     ),
-        #
-        #     maxwell(
-        #         len(self.distance_direction_from_enemy),
-        #         self.max_num_enemies
-        #     ) - 1,
-        #
-        #     - fermi(
-        #         self.stats.health,
-        #         self.stats.stats['health']
-        #     ),
-        # ]
+        self.reward = [
+            np.log(1 + self.stats.exp) if self.stats.exp >= 0 else -10,
 
-        self.reward = self.stats.exp\
-            + self.stats.health/self.stats.stats['health'] - 1\
-            - nearest_dist/np.sqrt(np.sum(self.map_edge))\
-            - nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\
-            - len(self.distance_direction_from_enemy)/self.max_num_enemies
+            fermi(nearest_dist, 300),
+
+            fermi(
+                nearest_enemy.stats.health,
+                nearest_enemy.stats.monster_info['health']
+            ),
+
+            maxwell(
+                len(self.distance_direction_from_enemy),
+                self.max_num_enemies
+            ) - 1,
+
+            - fermi(
+                self.stats.health,
+                self.stats.stats['health']
+            )
+        ]
+
+        # self.reward = self.stats.exp\
+        #     + self.stats.health/self.stats.stats['health'] - 1\
+        #     - nearest_dist/np.sqrt(np.sum(self.map_edge))\
+        #     - nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\
+        #     - 2*len(self.distance_direction_from_enemy)/self.max_num_enemies
 
         self.state_features = [
             self.animation.rect.center[0]/self.map_edge[0],
diff --git a/figures/actor_loss.png b/figures/actor_loss.png
index 031ad58..bf947fd 100644
Binary files a/figures/actor_loss.png and b/figures/actor_loss.png differ
diff --git a/figures/critic_loss.png b/figures/critic_loss.png
index c6cac6b..19147f6 100644
Binary files a/figures/critic_loss.png and b/figures/critic_loss.png differ
diff --git a/figures/score.png b/figures/score.png
index f025931..5fb8a5f 100644
Binary files a/figures/score.png and b/figures/score.png differ
diff --git a/figures/total_loss.png b/figures/total_loss.png
index 2ff68c3..766f492 100644
Binary files a/figures/total_loss.png and b/figures/total_loss.png differ
diff --git a/pneuma.py b/pneuma.py
index 2f2db6e..49bab8d 100644
--- a/pneuma.py
+++ b/pneuma.py
@@ -75,7 +75,7 @@ if __name__ == "__main__":
 
     parser.add_argument('--entropy',
                         type=float,
-                        default=0.001,
+                        default=0.01,
                         help="The entropy coefficient")
 
     parser.add_argument('--alpha',
@@ -139,13 +139,13 @@ if __name__ == "__main__":
     game = Game(show_pg=show_pygame, n_players=n_players)
 
     print("Initializing agents ...")
-    for player in game.level.player_sprites:
+    for player in tqdm(game.level.player_sprites,
+                       dynamic_ncols=True):
         player.setup_agent(
             gamma=args.gamma,
             alpha=args.alpha,
             policy_clip=args.policy_clip,
             batch_size=args.batch_size,
-            N=args.horizon,
             n_epochs=args.n_epochs,
             gae_lambda=args.gae_lambda,
             entropy_coef=args.entropy,
@@ -157,9 +157,11 @@ if __name__ == "__main__":
     for episode in tqdm(range(n_episodes),
                         dynamic_ncols=True):
 
-        # This handles agent continuity, as well as score persistence
         game.level.reset()
 
+        episode_reward = np.zeros(
+            shape=(n_players, episode_length))
+
         episode_actor_loss = np.zeros(
             shape=(n_players, learnings_per_episode))
 
@@ -177,8 +179,13 @@ if __name__ == "__main__":
 
             if not game.level.done:
                 game.run()
-                if step % horizon == 0:
-                    for player in game.level.player_sprites:
+
+                for player in game.level.player_sprites:
+
+                    episode_reward[player.player_id][step] = np.mean(
+                        player.reward)
+
+                    if (step % horizon == 0 and step != 0) or player.is_dead():
 
                         player.agent.learn()
 
@@ -196,10 +203,10 @@ if __name__ == "__main__":
         # Gather information about the episode
         for player in game.level.player_sprites:
 
-            score = player.reward
+            score = np.mean(episode_reward[player.player_id])
 
             # Update score
-            score_history[player.player_id][episode] = np.mean(score)
+            score_history[player.player_id][episode] = score
 
             # Update actor/critic loss
             actor_loss[player.player_id][episode] = np.mean(