removed unused hashdict and hashlist

Tesla2000 · Tesla2000 · commit b85fc91087fb · 2024-03-09T11:40:30.000+01:00
diff --git a/Config.py b/Config.py
@@ -21,8 +21,8 @@ class _ConfigAgent:
     # hidden_sizes = tuple()
     c = 0.1
     learning_rate = 1e-4
-    debug = True
-    pretrain = False
+    debug = False
+    pretrain = True
 
 
 class Config(_ConfigPaths, _ConfigAgent):
@@ -40,6 +40,6 @@ class Config(_ConfigPaths, _ConfigAgent):
 
 
 if Config.debug:
-    random.seed(69)
-    np.random.seed(69)
-    torch.random.manual_seed(69)
+    random.seed(42)
+    np.random.seed(42)
+    torch.random.manual_seed(42)
diff --git a/agent/self_play.py b/agent/self_play.py
@@ -1,34 +1,30 @@
+import random
 from collections import deque
-from itertools import count, cycle, islice
-from more_itertools import windowed
+from itertools import count
 
 import numpy as np
 from tqdm import tqdm
 
 from Config import Config
+from src.Game import Game
 from .Agent import Agent
 from .policy import policy
-from src.Game import Game
 
 
 def self_play(
     agents: deque[Agent],
 ) -> tuple[list[tuple[np.array, np.array, int]], list[Agent]]:
     states = []
     winners = []
-    initial_state = Game(n_players=Config.n_players)
+    game = Game(n_players=Config.n_players)
     for agent in agents:
         agent.eval()
-    for agents_in_order in islice(
-        windowed(cycle(agents), Config.n_players), Config.n_players
-    ):
-        game = initial_state.copy()
-        id_to_agent = dict(
-            (player.id, agent) for agent, player in zip(agents_in_order, game.players)
-        )
-        results, winner = _perform_game(game, [], id_to_agent)
-        states += results
-        winners.append(winner)
+    id_to_agent = dict(
+        (player.id, agent) for agent, player in zip(random.sample(agents, Config.n_players), game.players)
+    )
+    results, winner = _perform_game(game, [], id_to_agent)
+    states += results
+    winners.append(winner)
     return states, winners
 
 
@@ -49,10 +45,10 @@ def _perform_game(
                 list(
                     (
                         state[0].get_state(),
-                        (onehot_encoded_action := np.zeros(Config.n_actions), onehot_encoded_action.__setitem__(game.all_moves.index(state[1]), 1))[0],
+                        np.eye(Config.n_actions)[game.all_moves.index(state[1])],
                         int(result[state[0].current_player.id] == 1),
                     )
-                    for state in states
+                    for state in states if state[1] != game.null_move
                 ),
                 id_to_agent[
                     next(player.id for player in game.players if result[player.id])
diff --git a/hashabledict.py b/hashabledict.py
diff --git a/src/Game.py b/src/Game.py
@@ -36,7 +36,7 @@ class Game:
     _performed_the_last_move: dict = None
     _last_turn: bool = False
     _state_extractor: Type[StateExtractor] = StateExtractor
-    _null_move: NullMove = NullMove()
+    null_move: NullMove = NullMove()
 
     def __post_init__(self):
         if not self.players:
@@ -84,7 +84,8 @@ def next_turn(self) -> None:
 
     def is_terminal(self) -> bool:
         return all(self._performed_the_last_move.values()) or (
-            not any(move for move in self.all_moves if move.is_valid(self))
+            not self.null_move.is_valid(self)
+            or not any(move for move in self.all_moves if move.is_valid(self))
         )
 
     def get_results(self) -> dict[int, int]:
@@ -150,7 +151,9 @@ def copy(self) -> Self:
         return game
 
     def get_possible_actions(self) -> tuple[Move, ...]:
-        return tuple(move for move in self.all_moves if move.is_valid(self)) or ((self._null_move,) if self._null_move.is_valid(self) else tuple())
+        return tuple(move for move in self.all_moves if move.is_valid(self)) or (
+            (self.null_move,) if self.null_move.is_valid(self) else tuple()
+        )
 
     combos = combinations([{field.name: 1} for field in fields(BasicResources)], 3)
     all_moves = list(
diff --git a/src/entities/extended_lists/Aristocrats.py b/src/entities/extended_lists/Aristocrats.py
@@ -1,8 +1,7 @@
-from .hashablelist import hashablelist
 from ..Aristocrat import Aristocrat, empty_aristocrat
 
 
-class Aristocrats(hashablelist):
+class Aristocrats(list):
     def pop(self, index: int) -> Aristocrat:
         aristocrat = super().pop(index)
         self.append(empty_aristocrat)
diff --git a/src/entities/extended_lists/PlayerAristocrats.py b/src/entities/extended_lists/PlayerAristocrats.py
@@ -1,7 +1,4 @@
-from .hashablelist import hashablelist
-
-
-class PlayerAristocrats(hashablelist):
+class PlayerAristocrats(list):
     @property
     def points(self) -> int:
         return sum(aristocrat.points for aristocrat in self)
diff --git a/src/entities/extended_lists/PlayerCards.py b/src/entities/extended_lists/PlayerCards.py
@@ -1,8 +1,7 @@
-from .hashablelist import hashablelist
 from ..BasicResources import BasicResources
 
 
-class PlayerCards(hashablelist):
+class PlayerCards(list):
     @property
     def production(self) -> BasicResources:
         if not self:
diff --git a/src/entities/extended_lists/PlayerReserve.py b/src/entities/extended_lists/PlayerReserve.py
@@ -2,10 +2,9 @@
 from typing import Iterable
 
 from ..Card import Card, empty_card
-from .hashablelist import hashablelist
 
 
-class PlayerReserve(hashablelist):
+class PlayerReserve(list):
     def __init__(self, iterable: Iterable = None):
         super().__init__(iterable or (empty_card for _ in range(3)))
 
diff --git a/src/entities/extended_lists/hashablelist.py b/src/entities/extended_lists/hashablelist.py