diff --git a/ml/ppo/brain.py b/ml/ppo/brain.py index dfe58bb..04e178e 100644 --- a/ml/ppo/brain.py +++ b/ml/ppo/brain.py @@ -52,7 +52,7 @@ class PPOMemory: class ActorNetwork(nn.Module): - def __init__(self, input_dim, output_dim, alpha, fc1_dims=512, fc2_dims=512, chkpt_dir='tmp/ppo'): + def __init__(self, input_dim, output_dim, alpha, fc1_dims=1024, fc2_dims=1024, chkpt_dir='tmp/ppo'): super(ActorNetwork, self).__init__() self.chkpt_dir = chkpt_dir @@ -62,6 +62,8 @@ class ActorNetwork(nn.Module): nn.LeakyReLU(), nn.Linear(fc1_dims, fc2_dims), nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), nn.Linear(fc2_dims, output_dim), nn.Softmax(dim=-1) ) @@ -89,7 +91,7 @@ class ActorNetwork(nn.Module): class CriticNetwork(nn.Module): - def __init__(self, input_dims, alpha, fc1_dims=512, fc2_dims=512, chkpt_dir='tmp/ppo'): + def __init__(self, input_dims, alpha, fc1_dims=4096, fc2_dims=4096, chkpt_dir='tmp/ppo'): super(CriticNetwork, self).__init__() self.chkpt_dir = chkpt_dir @@ -99,6 +101,16 @@ class CriticNetwork(nn.Module): nn.LeakyReLU(), nn.Linear(fc1_dims, fc2_dims), nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), + nn.Linear(fc1_dims, fc2_dims), + nn.LeakyReLU(), nn.Linear(fc2_dims, 1) )