main.py 8.56 KB
Newer Older
Eduard Pizur's avatar
Eduard Pizur committed
1
# importovanie kniznic
2
3
4
5
6
7
import sys
import os
import datetime

from torch.utils.tensorboard import SummaryWriter
import numpy as np
8
from pathlib import Path
9
10
11
12
13
14
15
16

import gym
from gym.wrappers import AtariPreprocessing
from gym.wrappers import FrameStack
from gym.wrappers import Monitor

from utils.constant import *
from utils.atari_wrappers import make_env
Eduard Pizur's avatar
Eduard Pizur committed
17
18
# from utils.atari_wrapper_openai import make_atari, wrap_deepmind,
                                         wrap_pytorch
19

20
21
22
sys.path.append(os.path.abspath('../dp-2020'))
sys.path.append(os.path.abspath('../../'))

Eduard Pizur's avatar
Eduard Pizur committed
23
from utils.openai_baseline import make_atari, wrap_deepmind
Eduard Pizur's avatar
Eduard Pizur committed
24
from utils.atari_wrapper_openai import WrapPyTorch
Eduard Pizur's avatar
Eduard Pizur committed
25

Eduard Pizur's avatar
Eduard Pizur committed
26
# nacitanie jednotlivych agentov pre rozne vylepsenia
Eduard Pizur's avatar
Eduard Pizur committed
27
if True:
Eduard Pizur's avatar
Eduard Pizur committed
28
29
30
31
32
33
34
35
36
37
    from agents.deep_q_network.agent 
                    import Agent as DQNAgent
    #Double
    from agents.double_deep_q_network.agent 
                    import Agent as DDQNAgent
    #Dueling
    from agents.dueling_deep_q_network.agent 
                    import Agent as DuelDQNAgent
    from agents.dueling_double_q_network.agent 
                    import Agent as D3QNAgent
38
    # Noisy
Eduard Pizur's avatar
Eduard Pizur committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    from agents.noisy_deep_q_network.agent 
                    import Agent as Noisy_DQNAgent
    from agents.noisy_dueling_deep_q_network.agent 
                    import Agent as Noisy_DuelDQNAgent
    from agents.noisy_dueling_double_q_network.agent 
                    import Agent as Noisy_D3QNAgent
    
    #Distributional
    from agents.distributional_deep_q_network.agent 
                    import Agent as C51_DQNAgent
                    
    #Rainbow
    from agents.rainbow_deep_q_network.agent
                    import Agent as Rainbow_DQNAgent
Eduard Pizur's avatar
Eduard Pizur committed
53
54

def main(short_name, full_name, agent, per):
Eduard Pizur's avatar
Eduard Pizur committed
55
56
57
58
    # inicializacia prostredia a nastavenie predspracovania
    env = gym.make(ENVIRONMENT)
    env = wrap_deepmind(env)
    env = WrapPyTorch(env)
Eduard Pizur's avatar
Eduard Pizur committed
59

Eduard Pizur's avatar
Eduard Pizur committed
60
    # nastavenie ci sa vyuzije priorizovany pamatovy zasobnik
Eduard Pizur's avatar
Eduard Pizur committed
61
62
63
64
65
    if per:
        using_per = "using_PER"
    else:
        using_per = "using_RM"

Eduard Pizur's avatar
Eduard Pizur committed
66
67
68
69
    # nastavenie Tensorboardu
    run_name = "runs/{}/{}/{}".format(full_name,using_per,
                                  datetime.datetime.now()
                                  .strftime("%Y-%m-%d_%H-%M"))
Eduard Pizur's avatar
Eduard Pizur committed
70
71
    writer = SummaryWriter(run_name)

Eduard Pizur's avatar
Eduard Pizur committed
72
    # nastavenie mena pre ulozenie videa z procesu trenovania
Eduard Pizur's avatar
Eduard Pizur committed
73
    file_name = f"{short_name}_{using_per}"
Eduard Pizur's avatar
Eduard Pizur committed
74

Eduard Pizur's avatar
Eduard Pizur committed
75
76
77
78
79
    video_path = os.path.join(
        os.getcwd(), f"resources/video/{file_name}")
    Path(video_path).mkdir(parents=True, exist_ok=True)
    env = Monitor(
        env, f"resources/video/{file_name}", force=True)
Eduard Pizur's avatar
Eduard Pizur committed
80

Eduard Pizur's avatar
Eduard Pizur committed
81
    # incializacia agenta
Eduard Pizur's avatar
Eduard Pizur committed
82
83
    agent = agent

Eduard Pizur's avatar
Eduard Pizur committed
84
85
    # inicializacia hodnot, ktore sa budu pouzivat pri trenovacom
    # procese
Eduard Pizur's avatar
Eduard Pizur committed
86
87
    best_score = 0
    learn_steps = 0
Eduard Pizur's avatar
Eduard Pizur committed
88
    avg_score = 0
Eduard Pizur's avatar
Eduard Pizur committed
89
90
    scores = []

Eduard Pizur's avatar
Eduard Pizur committed
91
    # pociatocne hodnoty do Tensorboardu
Eduard Pizur's avatar
Eduard Pizur committed
92
93
94
    writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
    writer.add_scalar('Best Score', best_score, learn_steps)
    writer.add_scalar('Score', best_score, learn_steps)
Eduard Pizur's avatar
Eduard Pizur committed
95
96
97
    if per:
        writer.add_scalar('Beta', agent.memory.beta, learn_steps)

Eduard Pizur's avatar
Eduard Pizur committed
98
    # trenovaci proces
Eduard Pizur's avatar
Eduard Pizur committed
99
    for episode in range(NUM_OF_EPISODES):
Eduard Pizur's avatar
Eduard Pizur committed
100
        # zaciatok epizody
Eduard Pizur's avatar
Eduard Pizur committed
101
102
103
104
        state = env.reset()
        done = False
        score = 0

Eduard Pizur's avatar
Eduard Pizur committed
105
        # opakuje az pokial agent nezomrie
Eduard Pizur's avatar
Eduard Pizur committed
106
107
108
        while not done:
            action = agent.choose_action(state)
            next_state, reward, done, info = env.step(action)
Eduard Pizur's avatar
Eduard Pizur committed
109
110

            # aktualizacia agentoveho zasobnika
Eduard Pizur's avatar
Eduard Pizur committed
111
            experience = (state, action, next_state, reward, done)
Eduard Pizur's avatar
Eduard Pizur committed
112
            agent.append_experience(experience)
Eduard Pizur's avatar
Eduard Pizur committed
113

Eduard Pizur's avatar
Eduard Pizur committed
114
115
            # ak agent disponuje dostatocnym poctom spomienok na
            # mozny proces trenovania
Eduard Pizur's avatar
Eduard Pizur committed
116
117
118
119
120
121
122
123
            if agent.is_train_process_possible():
                agent.train()
                agent.decay_epsilon()

            score += reward
            state = next_state
            learn_steps += 1

Eduard Pizur's avatar
Eduard Pizur committed
124
125
            # kazdych n krokov agent si aktualizuje vahy v cielovej
            # sieti
Eduard Pizur's avatar
Eduard Pizur committed
126
127
128
            if learn_steps % TARGET_NET_UPDATE == 0:
                agent.replace_weights()

Eduard Pizur's avatar
Eduard Pizur committed
129
        # ulozi model na urcitej epizode
Eduard Pizur's avatar
Eduard Pizur committed
130
        if episode in [200, 1000, 2000, 3000]:
Eduard Pizur's avatar
Eduard Pizur committed
131
132
            agent.save_model(str(episode))

Eduard Pizur's avatar
Eduard Pizur committed
133
        # ulozi model ak dosiahne najlepsie priemerne skore
Eduard Pizur's avatar
Eduard Pizur committed
134
135
136
        if agent.epsilon == agent.eps_min:
            if avg_score >= best_score:
                agent.save_model("best_model")
Eduard Pizur's avatar
Eduard Pizur committed
137

Eduard Pizur's avatar
Eduard Pizur committed
138
        # ulozi najlepsie skore
Eduard Pizur's avatar
Eduard Pizur committed
139
        if episode >= 400:
Eduard Pizur's avatar
Eduard Pizur committed
140
141
            best_score = avg_score if best_score < avg_score 
                                   else best_score
Eduard Pizur's avatar
Eduard Pizur committed
142

Eduard Pizur's avatar
Eduard Pizur committed
143
        scores.append(score)
Eduard Pizur's avatar
Eduard Pizur committed
144
        avg_score = np.mean(scores[-50:])
Eduard Pizur's avatar
Eduard Pizur committed
145

Eduard Pizur's avatar
Eduard Pizur committed
146
        # vypis pri procese trenovania        
Eduard Pizur's avatar
Eduard Pizur committed
147
        print('episode: ', episode, 'score: ', score,
Eduard Pizur's avatar
Eduard Pizur committed
148
149
150
              ' average score %.1f' % avg_score, 
              'best score %.2f' % best_score,
              'epsilon %.2f' % agent.epsilon, 'steps', learn_steps)
Eduard Pizur's avatar
Eduard Pizur committed
151
152
153
154
155

        writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
        writer.add_scalar('Best Score', best_score, learn_steps)
        writer.add_scalar('Score', score, learn_steps)

Eduard Pizur's avatar
Eduard Pizur committed
156
157
158
        if per:
            writer.add_scalar('Beta', agent.memory.beta, learn_steps)

Eduard Pizur's avatar
Eduard Pizur committed
159
160
161
    writer.close()
    env.close()

Eduard Pizur's avatar
Eduard Pizur committed
162
163
    return avg_score, best_score, learn_steps

Eduard Pizur's avatar
Eduard Pizur committed
164
165

if __name__ == "__main__":
Eduard Pizur's avatar
Eduard Pizur committed
166
    # kombinacie algoritmov pre trenovanie
Eduard Pizur's avatar
Eduard Pizur committed
167
    combinations = [
Eduard Pizur's avatar
Eduard Pizur committed
168
        # (1, False), #DQN RM
Eduard Pizur's avatar
Eduard Pizur committed
169
        # (2, True), #DDQN Priority
Eduard Pizur's avatar
Eduard Pizur committed
170
        # (2, False),  # DDQN RM
Eduard Pizur's avatar
Eduard Pizur committed
171
        # (4, False), #Dueling Double DQN RM
Eduard Pizur's avatar
Eduard Pizur committed
172
        # (5, False), #Noisy DQN RM
Eduard Pizur's avatar
Eduard Pizur committed
173
174
        # (9, False), # C51 DQN RM
        # (10, True), #rainbow
Eduard Pizur's avatar
Eduard Pizur committed
175
        (10, False)  # rainbow
Eduard Pizur's avatar
Eduard Pizur committed
176
177
    ]

Eduard Pizur's avatar
Eduard Pizur committed
178
    # nazvy pre jednotlive algoritmy
Eduard Pizur's avatar
Eduard Pizur committed
179
180
    network = {
        1: {
Eduard Pizur's avatar
Eduard Pizur committed
181
182
            "full_name": "_deep_q_network",
            "short_name": "_DQN",
Eduard Pizur's avatar
Eduard Pizur committed
183
184
185
            "agent": DQNAgent
        },
        2: {
Eduard Pizur's avatar
Eduard Pizur committed
186
187
            "full_name": "_double_deep_q_network",
            "short_name": "_DDQN",
Eduard Pizur's avatar
Eduard Pizur committed
188
189
            "agent": DDQNAgent
        },
Eduard Pizur's avatar
Eduard Pizur committed
190
191
192
193
194
        3: {
            "full_name": "_dueling_deep_q_network",
            "short_name": "_DuelingDQN",
            "agent": DuelDQNAgent
        },
Eduard Pizur's avatar
Eduard Pizur committed
195
        4: {
Eduard Pizur's avatar
Eduard Pizur committed
196
197
            "full_name": "_dueling_double_q_network",
            "short_name": "_D3QN",
Eduard Pizur's avatar
Eduard Pizur committed
198
            "agent": D3QNAgent
199
200
        },
        5: {
Eduard Pizur's avatar
Eduard Pizur committed
201
202
            "full_name": "_noisy_deep_q_network",
            "short_name": "_Noisy_DQN",
203
204
            "agent": Noisy_DQNAgent
        },
Eduard Pizur's avatar
Eduard Pizur committed
205
206
207
208
209
210
211
212
213
214
        6: {
            "full_name": "_noisy_dueling_deep_q_network",
            "short_name": "_Noisy_DuelingDQN",
            "agent": Noisy_DuelDQNAgent
        },
        7: {
            "full_name": "_noisy_dueling_double_deep_q_network",
            "short_name": "_Noisy_D3QN",
            "agent": Noisy_D3QNAgent
        },
Eduard Pizur's avatar
Eduard Pizur committed
215
216
217
218
219
220
        8: {
            "full_name": "_n_step_deep_q_network",
            "short_name": "_N_step_DQN",
            "agent": N_Step_DQNAgent
        },
        9: {
Eduard Pizur's avatar
Eduard Pizur committed
221
222
            "full_name": "_C51_deep_q_network",
            "short_name": "_C51_DQN",
Eduard Pizur's avatar
Eduard Pizur committed
223
            "agent": C51_DQNAgent
Eduard Pizur's avatar
Eduard Pizur committed
224
        }
Eduard Pizur's avatar
Eduard Pizur committed
225
        10: {
Eduard Pizur's avatar
Eduard Pizur committed
226
227
            "full_name": "_Rainbow_deep_q_network",
            "short_name": "_Rainbow_DQN",
Eduard Pizur's avatar
Eduard Pizur committed
228
            "agent": Rainbow_DQNAgent
Eduard Pizur's avatar
Eduard Pizur committed
229
        }
230
    }
Eduard Pizur's avatar
Eduard Pizur committed
231
232
    
    # pre kazdu zvolenu kombinaciu trenuj agenta
Eduard Pizur's avatar
Eduard Pizur committed
233
234
235
    for combination in combinations:
        selected_network = combination[0]
        per = combination[1]
236

Eduard Pizur's avatar
Eduard Pizur committed
237
238
239
        short_name = network[selected_network]["short_name"]
        full_name = network[selected_network]["full_name"]
        agent = network[selected_network]["agent"](per)
240

Eduard Pizur's avatar
Eduard Pizur committed
241
        # vypis zahajenia treningoveho procesu
Eduard Pizur's avatar
Eduard Pizur committed
242
243
244
        print("Init training process with these parameters:")
        print("method: ", full_name)
        print("using Prioritized Experience replay in Agent: ", per)
245

Eduard Pizur's avatar
fixing    
Eduard Pizur committed
246
        # train
Eduard Pizur's avatar
Eduard Pizur committed
247
248
        avg_score, best_score, learning_steps = main(
            short_name, full_name, agent, per)
Eduard Pizur's avatar
Eduard Pizur committed
249

Eduard Pizur's avatar
Eduard Pizur committed
250
        # ulozenie vysledku trenovania do textoveho suboru
Eduard Pizur's avatar
Eduard Pizur committed
251
252
253
254
255
256
257
258
259
260
        with open("resultsContinueLast.txt", "a") as f:
            today = datetime.date.today().strftime("%d/%m/%y")
            f.write(f"Started training: {today}\n")
            f.write(f"Constants used in training\n")
            f.write(f"LR_Rate: {LEARNING_RATE}\n")
            f.write(f"Gamma: {DISCOUNT_FACTOR}\n")
            f.write("=" * 23)
            f.write("\n")
            f.write("=" * 23)
            f.write("\n")
Eduard Pizur's avatar
Eduard Pizur committed
261
            f.write(f"Method: {full_name}\n")
Eduard Pizur's avatar
Eduard Pizur committed
262
263
            f.write(f"Agent using PER: {per}\n")
            f.write(f"Results:\n")
264
            f.write(f"Learn steps: {learning_steps}\n")
Eduard Pizur's avatar
Eduard Pizur committed
265
266
267
            f.write(f"Average score: {avg_score}\n")
            f.write(f"Best score: {best_score}\n")
            f.write("="*23)
Eduard Pizur's avatar
Eduard Pizur committed
268
            f.write("\n")