Skip to content

Commit e8cd14d

Browse files
committed
It is running but is it slow...
1 parent f30cb8f commit e8cd14d

5 files changed

Lines changed: 29 additions & 18 deletions

File tree

Config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ class _ConfigPaths:
1818

1919

2020
class Config(_ConfigPaths):
21+
# hidden_sizes = (256, 128, 64, 32)
22+
hidden_sizes = (256,)
23+
# hidden_sizes = tuple()
24+
learning_rate = 1e-3
25+
max_results_held = 100
2126
minimal_relative_agent_improvement = 1.1
2227
min_games_to_replace_agents = 20
2328
train_batch_size = 64

agent/Agent.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from itertools import pairwise, starmap
22

3+
import numpy as np
34
from torch import nn, Tensor
45

6+
from Config import Config
7+
58

69
class Agent(nn.Module):
710
_input_size_dictionary = {
@@ -11,27 +14,29 @@ class Agent(nn.Module):
1114
def __init__(
1215
self,
1316
n_players: int,
14-
hidden_sizes: tuple = (256, 128, 64, 32),
17+
hidden_sizes: tuple = Config.hidden_sizes,
1518
n_moves: int = 46,
1619
):
1720
super().__init__()
18-
self.relu = nn.ReLU()
1921
self.tanh = nn.Tanh()
2022
self.softmax = nn.Softmax(dim=1)
2123
first_size = self._get_size(n_players)
2224
sizes = first_size, *hidden_sizes
23-
self.layers = tuple(starmap(nn.Linear, pairwise(sizes)))
24-
for index, layer in enumerate(self.layers):
25-
setattr(self, f"layer_{index}", layer)
26-
self.fc_v = nn.Linear(hidden_sizes[-1], 1)
27-
self.fc_p = nn.Linear(hidden_sizes[-1], n_moves)
25+
self.layers = nn.ModuleList(starmap(nn.Linear, pairwise(sizes)))
26+
self.trained = False
27+
self.fc_v = nn.Linear(sizes[-1], 1)
28+
self.fc_p = nn.Linear(sizes[-1], n_moves)
2829
self._n_moves = n_moves
2930

3031
def _get_size(self, n_players: int) -> int:
3132
return self._input_size_dictionary[n_players]
3233

3334
def forward(self, state: Tensor):
35+
if not self.training and not self.trained:
36+
return self.softmax(Tensor(np.random.random((1, self._n_moves)))), Tensor(
37+
np.random.uniform(-1, 1, (1, 1))
38+
)
39+
self.trained = True
3440
for layer in self.layers:
3541
state = layer(state)
36-
state = self.relu(state)
3742
return self.softmax(self.fc_p(state)), self.tanh(self.fc_v(state))

agent/train_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def train_agent(agent: Agent, train_data: deque[tuple[tuple, np.array, int]]):
1313
agent.train()
1414
categorical_cross_entropy = nn.CrossEntropyLoss()
1515
binary_cross_entropy = nn.BCELoss()
16-
optimizer = optim.Adam(agent.parameters())
16+
optimizer = optim.Adam(agent.parameters(), lr=Config.learning_rate)
1717
dataset = RLDataset(train_data)
1818
loader = DataLoader(dataset, batch_size=Config.train_batch_size)
1919
for batch in loader:

main.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@
1414
def main():
1515
training_buffer = deque(maxlen=Config.training_buffer_len)
1616
agents = deque((Agent(Config.n_players) for _ in range(Config.n_players)), maxlen=Config.n_players)
17-
agent_scores = defaultdict(int, ((id(agent), 0) for agent in agents))
17+
scores = deque(maxlen=Config.max_results_held)
1818
for _ in (count() if Config.n_games is None else range(Config.n_games)):
19-
buffer, agent = self_play(agents)
20-
agent_scores[id(agent)] += 1
21-
if sum(agent_scores.values()) >= Config.min_games_to_replace_agents and agent_scores[id(agents[-1])] > Config.minimal_relative_agent_improvement * sum(agent_scores.values()) / len(agents):
22-
torch.save(agent[-1].state_dict(), Config.model_path.joinpath(str(max(map(int, (*re.findall(r'\d+', ''.join(Config.model_path.iterdir())), -1))) + 1) + ".pth"))
23-
agents.append(Agent(Config.n_players).load_state_dict(deepcopy(agent[-1].state_dict())))
24-
agent_scores = defaultdict(int, ((id(agent), 0) for agent in agents))
25-
print(agent_scores[id(agents[-1])], sum(agent_scores.values()), agent_scores[id(agents[-1])] / sum(agent_scores.values()))
19+
buffer, winner = self_play(agents)
20+
scores.append(agents[-1] is winner)
21+
if len(scores) >= Config.min_games_to_replace_agents and sum(scores) > Config.minimal_relative_agent_improvement * len(scores) / len(agents):
22+
torch.save(agents[-1].state_dict(), Config.model_path.joinpath(str(max(map(int, (*re.findall(r'\d+', ''.join(Config.model_path.iterdir())), -1))) + 1) + ".pth"))
23+
agents.append(Agent(Config.n_players).load_state_dict(deepcopy(agents[-1].state_dict())))
24+
agents[-1].training = True
25+
scores = deque(maxlen=Config.max_results_held)
26+
print(sum(scores) / len(scores), len(scores))
2627
training_buffer += buffer
2728
train_agent(agents[-1], training_buffer)
2829

src/moves/GrabTwoResource.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def perform(self, game: "Game") -> "Game":
2020
return game
2121

2222
def is_valid(self, game: "Game") -> bool:
23-
resource = next(compress(asdict(self.resources).keys(), iter(self.resources)))
23+
resource = next(compress(('red', 'green', 'blue', 'black', 'white'), iter(self.resources)))
2424
if getattr(game.board.resources, resource) < 4:
2525
return False
2626
return super().is_valid(game)

0 commit comments

Comments
 (0)