Updated again state and reward features

2023-12-07 21:36:39 +01:00 · 2023-12-07 21:36:39 +01:00 · 789bbf3997
commit 789bbf3997
parent 0b006b83a7
7 changed files with 41 additions and 19 deletions
--- a/assets/map/FloorBlocks.csv
+++ b/assets/map/FloorBlocks.csv
@ -47,4 +47,4 @@
 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,395,395,395,395,395,395,-1,-1,395,395,395,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,395,-1,-1,-1,-1,-1,-1
 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,395,395,395,395,-1,395,395,395,395,395,395,395,395,395,-1,395,395,-1,-1,-1,-1,-1,-1
 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,395,395,395,-1,-1,-1,-1,-1,-1,-1
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,600
--- a/entities/player.py
+++ b/entities/player.py
@ -19,6 +19,7 @@ class Player(pygame.sprite.Sprite):
                 player_id,
                 role,
                 position,
+                 map_edge,
                 groups,
                 obstacle_sprites,
                 visible_sprites,
@ -28,6 +29,7 @@ class Player(pygame.sprite.Sprite):
        super().__init__(groups)

        self.initial_position = position
+        self.map_edge = map_edge
        self.player_id = player_id
        self.distance_direction_from_enemy = None

@ -146,6 +148,14 @@ class Player(pygame.sprite.Sprite):

    def get_current_state(self):

+        def fermi(x, a):
+            # Used for rescaling features
+            return 1 / (np.exp(-(x - a)) + 1)
+
+        def maxwell(x, a):
+            # Used for rescaling features
+            return 1 / np.exp((x - a) / a)
+
        if self.distance_direction_from_enemy != []:
            sorted_distances = sorted(
                self.distance_direction_from_enemy, key=lambda x: x[0])
@ -159,26 +169,34 @@ class Player(pygame.sprite.Sprite):
        self.reward_features = [
            self.stats.exp,

-            10/nearest_dist if nearest_dist > 10 else 1,
+            fermi(nearest_dist, 10),

-            1/(np.exp((nearest_enemy.stats.health -
-               nearest_enemy.stats.monster_info['health'])
-                      / nearest_enemy.stats.monster_info['health'])) - 1,
+            fermi(
+                nearest_enemy.stats.health,
+                nearest_enemy.stats.monster_info['health']
+            ),

-            1/(np.exp((len(self.distance_direction_from_enemy) -
-               self.max_num_enemies)/self.max_num_enemies)) - 1,
+            maxwell(
+                len(self.distance_direction_from_enemy),
+                self.max_num_enemies
+            ) - 1,
+
+            - fermi(
+                self.stats.health,
+                self.stats.stats['health']
+            ),

-            1 - 1/(np.exp((self.stats.health -
-                   self.stats.stats['health'])/self.stats.stats['health']))
        ]

        self.state_features = [
-            np.exp(-self.animation.rect.center[0]),
-            np.exp(-self.animation.rect.center[1]),
+            self.animation.rect.center[0]/self.map_edge[0],
+            self.animation.rect.center[1]/self.map_edge[1],
            self._input.movement.direction.x,
            self._input.movement.direction.y,
            self.stats.health/self.stats.stats['health'],
-            self.stats.energy/self.stats.stats['energy']
+            self.stats.energy/self.stats.stats['energy'],
+            1 if 'attack' in self._input.status else 0,
+
        ]

        enemy_states = []
@ -186,15 +204,14 @@ class Player(pygame.sprite.Sprite):
        for distance, direction, enemy in self.distance_direction_from_enemy:
            enemy_states.extend([

-                10/distance if distance > 10 else 1,
+                fermi(distance, 10),

                direction[0],

                direction[1],

-                1/(np.exp((nearest_enemy.stats.health -
-                           nearest_enemy.stats.monster_info['health'])
-                          / nearest_enemy.stats.monster_info['health'])) - 1,
+                nearest_enemy.stats.health /
+                nearest_enemy.stats.monster_info['health'],

                enemy.stats.exp,
            ])
--- a/figures/actor_loss.png
+++ b/figures/actor_loss.png
--- a/figures/critic_loss.png
+++ b/figures/critic_loss.png
--- a/figures/score.png
+++ b/figures/score.png
--- a/figures/total_loss.png
+++ b/figures/total_loss.png
--- a/level.py
+++ b/level.py
@ -79,12 +79,16 @@ class Level:

                        # Generate unpassable terrain
                        if style == 'boundary':
-                            if col != '700':
+
+                            if col == '600':
+                                self.map_edge = (x, y)
+
+                            elif col != '700':
                                Terrain((x, y),
                                        [self.obstacle_sprites,
                                            self.visible_sprites],
                                        'invisible')
-                            if col == '700':
+                            elif col == '700' and self.n_players > 1:
                                print(f"Prison set at:{(x, y)}")
                        # Generate grass
                        if style == 'grass':
@ -145,6 +149,7 @@ class Level:
                player_id,
                'tank',
                choice(self.possible_player_locations),
+                self.map_edge,
                [self.visible_sprites],
                self.obstacle_sprites,
                self.visible_sprites,
@ -215,7 +220,7 @@ class Level:
            player.stats.energy\
                = player.stats.stats['energy']

-            # player.stats.exp = 0
+            player.stats.exp = 0

        self.get_entities()
        self.get_distance_direction()