Merge branch 'refs/heads/feature/evaluation'

Tesla2000 · Tesla2000 · commit 6340e62ddf0e · 2024-04-07T21:51:41.000+02:00
diff --git a/Config.py b/Config.py
@@ -22,16 +22,16 @@ class _ConfigAgent:
     #     64,
     #     32,
     # )
-    # hidden_sizes = (256,)
-    hidden_sizes = tuple()
+    hidden_sizes = (256,)
+    # hidden_sizes = tuple()
     c = 0.2
     learning_rate = 1e-5
     debug = False
     pretrain = True
 
 
 class Config(_ConfigPaths, _ConfigAgent):
-    train = False
+    train = True
     max_results_held = 100
     minimal_relative_agent_improvement = 1.1
     min_games_to_replace_agents = 40
diff --git a/agent/policy.py b/agent/policy.py
@@ -21,5 +21,5 @@ def policy(
     all_moves = game.get_possible_actions()
     for _ in range(n_simulations):
         search(game.copy(), agent, c, N, visited, P, Q)
-    pi = [N[initial_state][a] for a in all_moves]
+    pi = np.array([N[initial_state][a] for a in all_moves])
     return pi, all_moves[np.argmax(pi)]
diff --git a/agent/self_play.py b/agent/self_play.py
@@ -35,18 +35,15 @@ def _perform_game(
     for turn in tqdm(count()):
         agent = id_to_agent[game.current_player.id]
         pi, action = policy(game, agent, Config.c, Config.n_simulations)
-        action_index = game.all_moves.index(action)
-        onehot_encoded_action = np.zeros(Config.n_actions)
-        onehot_encoded_action[action_index] = 1
-        states.append((game, action, 0))
+        states.append((game, pi / pi.sum(), 0))
         game = game.perform(action)
         if game.is_terminal():
             result = game.get_results()
             return (
                 list(
                     (
                         state[0].get_state(),
-                        np.eye(Config.n_actions)[game.all_moves.index(state[1])],
+                        state[1],
                         int(result[state[0].current_player.id] == 1),
                     )
                     for state in states
diff --git a/main.py b/main.py
@@ -92,8 +92,27 @@ def evaluation():
         bce, cce = eval_agent(agent, eval_set)
         if bce >= prev_bce and cce >= prev_bce:
             break
-        prev_bce = bce
-        prev_cce = cce
+        prev_bce = min(prev_bce, bce)
+        prev_cce = min(prev_cce, cce)
+
+
+# def evaluation():
+#     v_agent = LogisticRegression()
+#     p_agent = LogisticRegression()
+#     train_set = reduce(
+#         operator.add,
+#         (eval(path.read_text()) for path in Config.training_data_path.iterdir()),
+#     )
+#     eval_set = reduce(
+#         operator.add,
+#         (eval(path.read_text()) for path in Config.evaluation_data_path.iterdir()),
+#     )
+#     v_agent.fit(tuple(sample[0] for sample in train_set), tuple(sample[2] for sample in train_set))
+#     p_agent.fit(tuple(sample[0] for sample in train_set), np.argmax(np.array(tuple(sample[1] for sample in train_set)), axis=1))
+#     print(
+#         v_agent.score(tuple(sample[0] for sample in eval_set), tuple(sample[2] for sample in eval_set)),
+#         p_agent.score(tuple(sample[0] for sample in eval_set), np.argmax(np.array(tuple(sample[1] for sample in eval_set)), axis=1)),
+#     )
 
 
 def main():