Massive improvement
|
@ -1,13 +1,15 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from .brain import ActorNetwork, CriticNetwork, PPOMemory
|
from .brain import ActorNetwork, CriticNetwork, PPOMemory
|
||||||
|
|
||||||
|
|
||||||
class Agent:
|
class Agent:
|
||||||
|
|
||||||
def __init__(self, input_dims, n_actions, gamma=0.99, alpha=0.0003,
|
def __init__(self, input_dims, n_actions, gamma=0.99, alpha=0.0003,
|
||||||
policy_clip=0.2, batch_size=64, N=2048, n_epochs=10,
|
policy_clip=0.2, batch_size=64, n_epochs=10,
|
||||||
gae_lambda=0.95, entropy_coef=0.001, chkpt_dir='tmp/ppo'):
|
gae_lambda=0.95, entropy_coef=0.001, chkpt_dir='tmp/ppo'):
|
||||||
|
|
||||||
self.gamma = gamma
|
self.gamma = gamma
|
||||||
|
@ -50,7 +52,12 @@ class Agent:
|
||||||
return action, probs, value
|
return action, probs, value
|
||||||
|
|
||||||
def learn(self):
|
def learn(self):
|
||||||
for _ in range(self.n_epochs):
|
for _ in tqdm(range(self.n_epochs),
|
||||||
|
desc='Learning...',
|
||||||
|
dynamic_ncols=True,
|
||||||
|
leave=False,
|
||||||
|
ascii=True):
|
||||||
|
|
||||||
state_arr, action_arr, old_probs_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches()
|
state_arr, action_arr, old_probs_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches()
|
||||||
|
|
||||||
values = vals_arr
|
values = vals_arr
|
||||||
|
@ -102,11 +109,11 @@ class Agent:
|
||||||
self.critic.optimizer.zero_grad()
|
self.critic.optimizer.zero_grad()
|
||||||
self.total_loss.backward()
|
self.total_loss.backward()
|
||||||
|
|
||||||
# T.nn.utils.clip_grad_norm_(
|
T.nn.utils.clip_grad_norm_(
|
||||||
# self.actor.parameters(), max_norm=2)
|
self.actor.parameters(), max_norm=2)
|
||||||
#
|
|
||||||
# T.nn.utils.clip_grad_norm_(
|
T.nn.utils.clip_grad_norm_(
|
||||||
# self.critic.parameters(), max_norm=2)
|
self.critic.parameters(), max_norm=2)
|
||||||
#
|
#
|
||||||
# # Calculate the gradient norms for both networks
|
# # Calculate the gradient norms for both networks
|
||||||
# actor_grad_norm = T.nn.utils.clip_grad_norm_(
|
# actor_grad_norm = T.nn.utils.clip_grad_norm_(
|
||||||
|
|
|
@ -59,9 +59,9 @@ class ActorNetwork(nn.Module):
|
||||||
|
|
||||||
self.actor = nn.Sequential(
|
self.actor = nn.Sequential(
|
||||||
nn.Linear(input_dim, fc1_dims),
|
nn.Linear(input_dim, fc1_dims),
|
||||||
nn.ReLU(),
|
nn.LeakyReLU(),
|
||||||
nn.Linear(fc1_dims, fc2_dims),
|
nn.Linear(fc1_dims, fc2_dims),
|
||||||
nn.ReLU(),
|
nn.LeakyReLU(),
|
||||||
nn.Linear(fc2_dims, output_dim),
|
nn.Linear(fc2_dims, output_dim),
|
||||||
nn.Softmax(dim=-1)
|
nn.Softmax(dim=-1)
|
||||||
)
|
)
|
||||||
|
|
|
@ -15,11 +15,6 @@ class Camera(pygame.sprite.Group):
|
||||||
self.half_height = self.display_surface.get_size()[1] // 2
|
self.half_height = self.display_surface.get_size()[1] // 2
|
||||||
self.offset = pygame.math.Vector2(100, 200)
|
self.offset = pygame.math.Vector2(100, 200)
|
||||||
|
|
||||||
# Creating the floor
|
|
||||||
image_path = import_assets(os.path.join('graphics',
|
|
||||||
'tilemap',
|
|
||||||
'ground.png'))
|
|
||||||
|
|
||||||
self.floor_surf = pygame.image.load(
|
self.floor_surf = pygame.image.load(
|
||||||
import_assets(
|
import_assets(
|
||||||
os.path.join('graphics',
|
os.path.join('graphics',
|
||||||
|
|
|
@ -30,7 +30,7 @@ monster_data = {
|
||||||
'notice_radius': 350},
|
'notice_radius': 350},
|
||||||
|
|
||||||
'bamboo': {'id': 4,
|
'bamboo': {'id': 4,
|
||||||
'health': 70,
|
'health': 50,
|
||||||
'exp': 9,
|
'exp': 9,
|
||||||
'attack': 20,
|
'attack': 20,
|
||||||
'attack_type': 'leaf_attack',
|
'attack_type': 'leaf_attack',
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
tank_stats = {
|
tank_stats = {
|
||||||
'role_id': 1,
|
'role_id': 1,
|
||||||
'health': 150,
|
'health': 150,
|
||||||
'energy': 40,
|
'energy': 70,
|
||||||
'attack': 10,
|
'attack': 10,
|
||||||
'magic': 3,
|
'magic': 5,
|
||||||
'speed': 3
|
'speed': 5
|
||||||
}
|
}
|
||||||
|
|
||||||
mage_stats = {
|
mage_stats = {
|
||||||
|
|
|
@ -57,7 +57,6 @@ class Player(pygame.sprite.Sprite):
|
||||||
alpha,
|
alpha,
|
||||||
policy_clip,
|
policy_clip,
|
||||||
batch_size,
|
batch_size,
|
||||||
N,
|
|
||||||
n_epochs,
|
n_epochs,
|
||||||
gae_lambda,
|
gae_lambda,
|
||||||
chkpt_dir,
|
chkpt_dir,
|
||||||
|
@ -75,7 +74,6 @@ class Player(pygame.sprite.Sprite):
|
||||||
alpha=alpha,
|
alpha=alpha,
|
||||||
policy_clip=policy_clip,
|
policy_clip=policy_clip,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
N=N,
|
|
||||||
n_epochs=n_epochs,
|
n_epochs=n_epochs,
|
||||||
gae_lambda=gae_lambda,
|
gae_lambda=gae_lambda,
|
||||||
entropy_coef=entropy_coef,
|
entropy_coef=entropy_coef,
|
||||||
|
@ -168,32 +166,32 @@ class Player(pygame.sprite.Sprite):
|
||||||
|
|
||||||
self.action_features = [self._input.action]
|
self.action_features = [self._input.action]
|
||||||
|
|
||||||
# self.reward = [
|
self.reward = [
|
||||||
# np.log(1 + self.stats.exp),
|
np.log(1 + self.stats.exp) if self.stats.exp >= 0 else -10,
|
||||||
#
|
|
||||||
# fermi(nearest_dist, 50),
|
|
||||||
#
|
|
||||||
# fermi(
|
|
||||||
# nearest_enemy.stats.health,
|
|
||||||
# nearest_enemy.stats.monster_info['health']
|
|
||||||
# ),
|
|
||||||
#
|
|
||||||
# maxwell(
|
|
||||||
# len(self.distance_direction_from_enemy),
|
|
||||||
# self.max_num_enemies
|
|
||||||
# ) - 1,
|
|
||||||
#
|
|
||||||
# - fermi(
|
|
||||||
# self.stats.health,
|
|
||||||
# self.stats.stats['health']
|
|
||||||
# ),
|
|
||||||
# ]
|
|
||||||
|
|
||||||
self.reward = self.stats.exp\
|
fermi(nearest_dist, 300),
|
||||||
+ self.stats.health/self.stats.stats['health'] - 1\
|
|
||||||
- nearest_dist/np.sqrt(np.sum(self.map_edge))\
|
fermi(
|
||||||
- nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\
|
nearest_enemy.stats.health,
|
||||||
- len(self.distance_direction_from_enemy)/self.max_num_enemies
|
nearest_enemy.stats.monster_info['health']
|
||||||
|
),
|
||||||
|
|
||||||
|
maxwell(
|
||||||
|
len(self.distance_direction_from_enemy),
|
||||||
|
self.max_num_enemies
|
||||||
|
) - 1,
|
||||||
|
|
||||||
|
- fermi(
|
||||||
|
self.stats.health,
|
||||||
|
self.stats.stats['health']
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# self.reward = self.stats.exp\
|
||||||
|
# + self.stats.health/self.stats.stats['health'] - 1\
|
||||||
|
# - nearest_dist/np.sqrt(np.sum(self.map_edge))\
|
||||||
|
# - nearest_enemy.stats.health/nearest_enemy.stats.monster_info['health']\
|
||||||
|
# - 2*len(self.distance_direction_from_enemy)/self.max_num_enemies
|
||||||
|
|
||||||
self.state_features = [
|
self.state_features = [
|
||||||
self.animation.rect.center[0]/self.map_edge[0],
|
self.animation.rect.center[0]/self.map_edge[0],
|
||||||
|
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 15 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 15 KiB |
23
pneuma.py
|
@ -75,7 +75,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
parser.add_argument('--entropy',
|
parser.add_argument('--entropy',
|
||||||
type=float,
|
type=float,
|
||||||
default=0.001,
|
default=0.01,
|
||||||
help="The entropy coefficient")
|
help="The entropy coefficient")
|
||||||
|
|
||||||
parser.add_argument('--alpha',
|
parser.add_argument('--alpha',
|
||||||
|
@ -139,13 +139,13 @@ if __name__ == "__main__":
|
||||||
game = Game(show_pg=show_pygame, n_players=n_players)
|
game = Game(show_pg=show_pygame, n_players=n_players)
|
||||||
|
|
||||||
print("Initializing agents ...")
|
print("Initializing agents ...")
|
||||||
for player in game.level.player_sprites:
|
for player in tqdm(game.level.player_sprites,
|
||||||
|
dynamic_ncols=True):
|
||||||
player.setup_agent(
|
player.setup_agent(
|
||||||
gamma=args.gamma,
|
gamma=args.gamma,
|
||||||
alpha=args.alpha,
|
alpha=args.alpha,
|
||||||
policy_clip=args.policy_clip,
|
policy_clip=args.policy_clip,
|
||||||
batch_size=args.batch_size,
|
batch_size=args.batch_size,
|
||||||
N=args.horizon,
|
|
||||||
n_epochs=args.n_epochs,
|
n_epochs=args.n_epochs,
|
||||||
gae_lambda=args.gae_lambda,
|
gae_lambda=args.gae_lambda,
|
||||||
entropy_coef=args.entropy,
|
entropy_coef=args.entropy,
|
||||||
|
@ -157,9 +157,11 @@ if __name__ == "__main__":
|
||||||
for episode in tqdm(range(n_episodes),
|
for episode in tqdm(range(n_episodes),
|
||||||
dynamic_ncols=True):
|
dynamic_ncols=True):
|
||||||
|
|
||||||
# This handles agent continuity, as well as score persistence
|
|
||||||
game.level.reset()
|
game.level.reset()
|
||||||
|
|
||||||
|
episode_reward = np.zeros(
|
||||||
|
shape=(n_players, episode_length))
|
||||||
|
|
||||||
episode_actor_loss = np.zeros(
|
episode_actor_loss = np.zeros(
|
||||||
shape=(n_players, learnings_per_episode))
|
shape=(n_players, learnings_per_episode))
|
||||||
|
|
||||||
|
@ -177,8 +179,13 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
if not game.level.done:
|
if not game.level.done:
|
||||||
game.run()
|
game.run()
|
||||||
if step % horizon == 0:
|
|
||||||
for player in game.level.player_sprites:
|
for player in game.level.player_sprites:
|
||||||
|
|
||||||
|
episode_reward[player.player_id][step] = np.mean(
|
||||||
|
player.reward)
|
||||||
|
|
||||||
|
if (step % horizon == 0 and step != 0) or player.is_dead():
|
||||||
|
|
||||||
player.agent.learn()
|
player.agent.learn()
|
||||||
|
|
||||||
|
@ -196,10 +203,10 @@ if __name__ == "__main__":
|
||||||
# Gather information about the episode
|
# Gather information about the episode
|
||||||
for player in game.level.player_sprites:
|
for player in game.level.player_sprites:
|
||||||
|
|
||||||
score = player.reward
|
score = np.mean(episode_reward[player.player_id])
|
||||||
|
|
||||||
# Update score
|
# Update score
|
||||||
score_history[player.player_id][episode] = np.mean(score)
|
score_history[player.player_id][episode] = score
|
||||||
|
|
||||||
# Update actor/critic loss
|
# Update actor/critic loss
|
||||||
actor_loss[player.player_id][episode] = np.mean(
|
actor_loss[player.player_id][episode] = np.mean(
|
||||||
|
|