diff --git a/agents/ppo/agent.py b/agents/ppo/agent.py index c8aeade..0f28d7d 100644 --- a/agents/ppo/agent.py +++ b/agents/ppo/agent.py @@ -1,13 +1,15 @@ import numpy as np import torch as T +from tqdm import tqdm + from .brain import ActorNetwork, CriticNetwork, PPOMemory class Agent: def __init__(self, input_dims, n_actions, gamma=0.99, alpha=0.0003, - policy_clip=0.2, batch_size=64, N=2048, n_epochs=10, + policy_clip=0.2, batch_size=64, n_epochs=10, gae_lambda=0.95, entropy_coef=0.001, chkpt_dir='tmp/ppo'): self.gamma = gamma @@ -50,7 +52,12 @@ class Agent: return action, probs, value def learn(self): - for _ in range(self.n_epochs): + for _ in tqdm(range(self.n_epochs), + desc='Learning...', + dynamic_ncols=True, + leave=False, + ascii=True): + state_arr, action_arr, old_probs_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches() values = vals_arr @@ -102,11 +109,11 @@ class Agent: self.critic.optimizer.zero_grad() self.total_loss.backward() - # T.nn.utils.clip_grad_norm_( - # self.actor.parameters(), max_norm=2) - # - # T.nn.utils.clip_grad_norm_( - # self.critic.parameters(), max_norm=2) + T.nn.utils.clip_grad_norm_( + self.actor.parameters(), max_norm=2) + + T.nn.utils.clip_grad_norm_( + self.critic.parameters(), max_norm=2) # # # Calculate the gradient norms for both networks # actor_grad_norm = T.nn.utils.clip_grad_norm_( diff --git a/agents/ppo/brain.py b/agents/ppo/brain.py index 6dc02b9..3e52a99 100644 --- a/agents/ppo/brain.py +++ b/agents/ppo/brain.py @@ -59,9 +59,9 @@ class ActorNetwork(nn.Module): self.actor = nn.Sequential( nn.Linear(input_dim, fc1_dims), - nn.ReLU(), + nn.LeakyReLU(), nn.Linear(fc1_dims, fc2_dims), - nn.ReLU(), + nn.LeakyReLU(), nn.Linear(fc2_dims, output_dim), nn.Softmax(dim=-1) ) diff --git a/camera.py b/camera.py index 12ee9c5..842fe1b 100644 --- a/camera.py +++ b/camera.py @@ -15,11 +15,6 @@ class Camera(pygame.sprite.Group): self.half_height = self.display_surface.get_size()[1] // 2 self.offset = pygame.math.Vector2(100, 200) - # Creating the floor - image_path = import_assets(os.path.join('graphics', - 'tilemap', - 'ground.png')) - self.floor_surf = pygame.image.load( import_assets( os.path.join('graphics', diff --git a/configs/game/monster_config.py b/configs/game/monster_config.py index cf1c451..14d5344 100644 --- a/configs/game/monster_config.py +++ b/configs/game/monster_config.py @@ -30,7 +30,7 @@ monster_data = { 'notice_radius': 350}, 'bamboo': {'id': 4, - 'health': 70, + 'health': 50, 'exp': 9, 'attack': 20, 'attack_type': 'leaf_attack', diff --git a/configs/game/player_config.py b/configs/game/player_config.py index a3471a9..940e576 100644 --- a/configs/game/player_config.py +++ b/configs/game/player_config.py @@ -1,10 +1,10 @@ tank_stats = { 'role_id': 1, 'health': 150, - 'energy': 40, + 'energy': 70, 'attack': 10, - 'magic': 3, - 'speed': 3 + 'magic': 5, + 'speed': 5 } mage_stats = { diff --git a/entities/player.py b/entities/player.py index ca2641e..1260052 100644 --- a/entities/player.py +++ b/entities/player.py @@ -57,7 +57,6 @@ class Player(pygame.sprite.Sprite): alpha, policy_clip, batch_size, - N, n_epochs, gae_lambda, chkpt_dir, @@ -75,7 +74,6 @@ class Player(pygame.sprite.Sprite): alpha=alpha, policy_clip=policy_clip, batch_size=batch_size, - N=N, n_epochs=n_epochs, gae_lambda=gae_lambda, entropy_coef=entropy_coef, @@ -168,32 +166,32 @@ class Player(pygame.sprite.Sprite): self.action_features = [self._input.action] - # self.reward = [ - # np.log(1 + self.stats.exp), - # - # fermi(nearest_dist, 50), - # - # fermi( - # nearest_enemy.stats.health, - # nearest_enemy.stats.monster_info['health'] - # ), - # - # maxwell( - # len(self.distance_direction_from_enemy), - # self.max_num_enemies - # ) - 1, - # - # - fermi( - # self.stats.health, - # self.stats.stats['health'] - # ), - # ] + self.reward = [ + np.log(1 + self.stats.exp) if self.stats.exp >= 0 else -10, - self.reward = self.stats.exp\ - + self.stats.health/self.stats.stats['health'] - 1\ - - nearest_dist/np.sqrt(np.sum(self.map_edge))\ - - nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\ - - len(self.distance_direction_from_enemy)/self.max_num_enemies + fermi(nearest_dist, 300), + + fermi( + nearest_enemy.stats.health, + nearest_enemy.stats.monster_info['health'] + ), + + maxwell( + len(self.distance_direction_from_enemy), + self.max_num_enemies + ) - 1, + + - fermi( + self.stats.health, + self.stats.stats['health'] + ) + ] + + # self.reward = self.stats.exp\ + # + self.stats.health/self.stats.stats['health'] - 1\ + # - nearest_dist/np.sqrt(np.sum(self.map_edge))\ + # - nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\ + # - 2*len(self.distance_direction_from_enemy)/self.max_num_enemies self.state_features = [ self.animation.rect.center[0]/self.map_edge[0], diff --git a/figures/actor_loss.png b/figures/actor_loss.png index 031ad58..bf947fd 100644 Binary files a/figures/actor_loss.png and b/figures/actor_loss.png differ diff --git a/figures/critic_loss.png b/figures/critic_loss.png index c6cac6b..19147f6 100644 Binary files a/figures/critic_loss.png and b/figures/critic_loss.png differ diff --git a/figures/score.png b/figures/score.png index f025931..5fb8a5f 100644 Binary files a/figures/score.png and b/figures/score.png differ diff --git a/figures/total_loss.png b/figures/total_loss.png index 2ff68c3..766f492 100644 Binary files a/figures/total_loss.png and b/figures/total_loss.png differ diff --git a/pneuma.py b/pneuma.py index 2f2db6e..49bab8d 100644 --- a/pneuma.py +++ b/pneuma.py @@ -75,7 +75,7 @@ if __name__ == "__main__": parser.add_argument('--entropy', type=float, - default=0.001, + default=0.01, help="The entropy coefficient") parser.add_argument('--alpha', @@ -139,13 +139,13 @@ if __name__ == "__main__": game = Game(show_pg=show_pygame, n_players=n_players) print("Initializing agents ...") - for player in game.level.player_sprites: + for player in tqdm(game.level.player_sprites, + dynamic_ncols=True): player.setup_agent( gamma=args.gamma, alpha=args.alpha, policy_clip=args.policy_clip, batch_size=args.batch_size, - N=args.horizon, n_epochs=args.n_epochs, gae_lambda=args.gae_lambda, entropy_coef=args.entropy, @@ -157,9 +157,11 @@ if __name__ == "__main__": for episode in tqdm(range(n_episodes), dynamic_ncols=True): - # This handles agent continuity, as well as score persistence game.level.reset() + episode_reward = np.zeros( + shape=(n_players, episode_length)) + episode_actor_loss = np.zeros( shape=(n_players, learnings_per_episode)) @@ -177,8 +179,13 @@ if __name__ == "__main__": if not game.level.done: game.run() - if step % horizon == 0: - for player in game.level.player_sprites: + + for player in game.level.player_sprites: + + episode_reward[player.player_id][step] = np.mean( + player.reward) + + if (step % horizon == 0 and step != 0) or player.is_dead(): player.agent.learn() @@ -196,10 +203,10 @@ if __name__ == "__main__": # Gather information about the episode for player in game.level.player_sprites: - score = player.reward + score = np.mean(episode_reward[player.player_id]) # Update score - score_history[player.player_id][episode] = np.mean(score) + score_history[player.player_id][episode] = score # Update actor/critic loss actor_loss[player.player_id][episode] = np.mean(