-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtrain.py
More file actions
104 lines (76 loc) · 3.32 KB
/
train.py
File metadata and controls
104 lines (76 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# RUN THE FILES IF THEY HAVEN'T
from tensorflow.keras.models import load_model
from DQNAgent import *
import math
from tqdm import tqdm
import pickle
import warnings
warnings.filterwarnings('ignore')
AGG_STATS_EVERY = 100 # calculate stats every 100 games for tensorboard
SAVE_MODEL_EVERY = 10_000 # save model and replay every 10,000 episodes
def main():
# INITIALIZING EVERTHING CELL
# Initialize the Board (9x9 matrix)
env = Board(9, 9)
# Set the mines about random coordinate
# Assume user clicked the random coordinate as the first tile
f_row, f_col = np.random.randint(env.rows, size=2)
print("First row: %d, First Col: %d" % (f_row, f_col))
env.set_mines_about(f_row, f_col,10) # set_mines_about(self,row_center,col_center,num_mines)
# print("Mines: ")
# env.printMines()
# print("Board: ")
# env.printBoard()
state_im = env.board3D() # board is currently 2D, making it 3D by (row, col, 1)
agent = DQNAgent(env)
progress_list, wins_list, ep_rewards = [], [], []
n_clicks = 0
# PLAY THE GAME!!! (# episodes Games)
for episode in tqdm(range(1,episodes+1), unit='episode'):
agent.tensorboard.step = episode
env.reset()
f_row, f_col = np.random.randint(env.rows, size=2)
env.set_mines_about(f_row, f_col,10)
done = False
ep_reward = 0
past_n_wins = env.n_wins
# play until lose
while not done:
current_state = env.board3D()
action = agent.get_action(current_state)
# Retrieve the next step and reward
new_state, reward, done = env.dig(math.floor(action / env.cols), action % env.cols)
# print("\nREWARD: ", reward)
ep_reward += reward
# append the data to batch_array
agent.update_replay_memory((current_state, action, reward, new_state, done))
agent.train(done)
n_clicks += 1
progress_list.append(env.n_progress) # n of non-guess moves
ep_rewards.append(ep_reward)
# print("Number of Wins :", env.n_wins)
if env.n_wins > past_n_wins:
wins_list.append(1)
else:
wins_list.append(0)
if len(agent.replay_memory) < MEM_SIZE_MIN:
# print("SKIP after Training Process")
continue
if not episode % AGG_STATS_EVERY:
med_progress = round(np.median(progress_list[-AGG_STATS_EVERY:]), 2)
win_rate = round(np.sum(wins_list[-AGG_STATS_EVERY:]) / AGG_STATS_EVERY, 2)
med_reward = round(np.median(ep_rewards[-AGG_STATS_EVERY:]), 2)
agent.tensorboard.update_stats(
progress_med = med_progress,
winrate = win_rate,
reward_med = med_reward,
learn_rate = agent.learn_rate,
epsilon = agent.epsilon)
print(f'Episode: {episode}, Median progress: {med_progress}, Median reward: {med_reward}, Win rate : {win_rate}')
if not episode % SAVE_MODEL_EVERY:
with open(f'replay/{MODEL_NAME}.pkl', 'wb') as output:
pickle.dump(agent.replay_memory, output)
agent.model.save(f'models/{MODEL_NAME}.h5')
print("Number of Wins :", env.n_wins)
if __name__ == "__main__":
main()