Commit 137fb3a3 authored by Eduard Pizur's avatar Eduard Pizur
Browse files

changed main.py

parent 0837f084
# importovanie kniznic
import sys import sys
import os import os
import datetime import datetime
...@@ -6,8 +7,6 @@ from torch.utils.tensorboard import SummaryWriter ...@@ -6,8 +7,6 @@ from torch.utils.tensorboard import SummaryWriter
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from PIL import Image as im
import gym import gym
from gym.wrappers import AtariPreprocessing from gym.wrappers import AtariPreprocessing
from gym.wrappers import FrameStack from gym.wrappers import FrameStack
...@@ -15,55 +14,62 @@ from gym.wrappers import Monitor ...@@ -15,55 +14,62 @@ from gym.wrappers import Monitor
from utils.constant import * from utils.constant import *
from utils.atari_wrappers import make_env from utils.atari_wrappers import make_env
from utils.atari_wrapper_openai import make_atari, wrap_deepmind, wrap_pytorch # from utils.atari_wrapper_openai import make_atari, wrap_deepmind,
wrap_pytorch
sys.path.append(os.path.abspath('../dp-2020')) sys.path.append(os.path.abspath('../dp-2020'))
sys.path.append(os.path.abspath('../../')) sys.path.append(os.path.abspath('../../'))
from utils.openai_baseline import make_atari, wrap_deepmind from utils.openai_baseline import make_atari, wrap_deepmind
from utils.atari_wrapper_openai import WrapPyTorch, wrap_pytorch from utils.atari_wrapper_openai import WrapPyTorch
# nacitanie jednotlivych agentov pre rozne vylepsenia
if True: if True:
# Vanilla deep q network from agents.deep_q_network.agent
from agents.deep_q_network.agent import Agent as DQNAgent import Agent as DQNAgent
# Double #Double
from agents.double_deep_q_network.agent import Agent as DDQNAgent from agents.double_deep_q_network.agent
# Dueling import Agent as DDQNAgent
from agents.dueling_double_q_network.agent import Agent as D3QNAgent #Dueling
from agents.dueling_deep_q_network.agent
import Agent as DuelDQNAgent
from agents.dueling_double_q_network.agent
import Agent as D3QNAgent
# Noisy # Noisy
from agents.noisy_deep_q_network.agent import Agent as Noisy_DQNAgent from agents.noisy_deep_q_network.agent
# N step import Agent as Noisy_DQNAgent
from agents.n_step_deep_q_network.agent import Agent as N_Step_DQNAgent from agents.noisy_dueling_deep_q_network.agent
# Distributional import Agent as Noisy_DuelDQNAgent
from agents.distributional_deep_q_network.agent import Agent as C51_DQNAgent from agents.noisy_dueling_double_q_network.agent
# Rainbow import Agent as Noisy_D3QNAgent
from agents.rainbow_deep_q_network.agent import Agent as Rainbow_DQNAgent
#Distributional
from agents.distributional_deep_q_network.agent
import Agent as C51_DQNAgent
#Rainbow
from agents.rainbow_deep_q_network.agent
import Agent as Rainbow_DQNAgent
def main(short_name, full_name, agent, per): def main(short_name, full_name, agent, per):
# inicializacia prostredia a nastavenie predspracovania
env = gym.make(ENVIRONMENT)
env = wrap_deepmind(env)
env = WrapPyTorch(env)
# Initialize env using wrappers for Atari Games preprocessing from gym # nastavenie ci sa vyuzije priorizovany pamatovy zasobnik
env = make_env(ENVIRONMENT)
# env = gym.make(ENVIRONMENT)
# env = AtariPreprocessing(env, noop_max=0)
# env = FrameStack(env, 4)
# env = make_atari(ENVIRONMENT)
# env = wrap_deepmind(env)
# env = wrap_pytorch(env)
# Init tensorboard
if per: if per:
using_per = "using_PER" using_per = "using_PER"
else: else:
using_per = "using_RM" using_per = "using_RM"
# nastavenie Tensorboardu
run_name = "runs_f/{}/{}/{}".format(full_name,using_per, run_name = "runs/{}/{}/{}".format(full_name,using_per,
datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")) datetime.datetime.now()
.strftime("%Y-%m-%d_%H-%M"))
writer = SummaryWriter(run_name) writer = SummaryWriter(run_name)
# capturing video # nastavenie mena pre ulozenie videa z procesu trenovania
file_name = f"{short_name}_{using_per}" file_name = f"{short_name}_{using_per}"
video_path = os.path.join( video_path = os.path.join(
...@@ -72,40 +78,41 @@ def main(short_name, full_name, agent, per): ...@@ -72,40 +78,41 @@ def main(short_name, full_name, agent, per):
env = Monitor( env = Monitor(
env, f"resources/video/{file_name}", force=True) env, f"resources/video/{file_name}", force=True)
# init agent # incializacia agenta
agent = agent agent = agent
# init parameters # inicializacia hodnot, ktore sa budu pouzivat pri trenovacom
# procese
best_score = 0 best_score = 0
learn_steps = 0 learn_steps = 0
avg_score = 0 avg_score = 0
scores = [] scores = []
# add values to tensorboard # pociatocne hodnoty do Tensorboardu
writer.add_scalar('Epsilon', agent.epsilon, learn_steps) writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps) writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', best_score, learn_steps) writer.add_scalar('Score', best_score, learn_steps)
writer.add_scalar('Loss', agent.loss, learn_steps)
if per: if per:
writer.add_scalar('Beta', agent.memory.beta, learn_steps) writer.add_scalar('Beta', agent.memory.beta, learn_steps)
# trenovaci proces
for episode in range(NUM_OF_EPISODES): for episode in range(NUM_OF_EPISODES):
# initialization of each episode # zaciatok epizody
state = env.reset() state = env.reset()
done = False done = False
score = 0 score = 0
# repeat until agent dies # opakuje az pokial agent nezomrie
while not done: while not done:
action = agent.choose_action(state) action = agent.choose_action(state)
next_state, reward, done, info = env.step(action) next_state, reward, done, info = env.step(action)
# updating agent's memory
# aktualizacia agentoveho zasobnika
experience = (state, action, next_state, reward, done) experience = (state, action, next_state, reward, done)
agent.append_experience(experience) agent.append_experience(experience)
# if agent has enough experiences in the memory # ak agent disponuje dostatocnym poctom spomienok na
# mozny proces trenovania
if agent.is_train_process_possible(): if agent.is_train_process_possible():
agent.train() agent.train()
agent.decay_epsilon() agent.decay_epsilon()
...@@ -114,34 +121,37 @@ def main(short_name, full_name, agent, per): ...@@ -114,34 +121,37 @@ def main(short_name, full_name, agent, per):
state = next_state state = next_state
learn_steps += 1 learn_steps += 1
# each n-step updates weights of target network # kazdych n krokov agent si aktualizuje vahy v cielovej
# sieti
if learn_steps % TARGET_NET_UPDATE == 0: if learn_steps % TARGET_NET_UPDATE == 0:
agent.replace_weights() agent.replace_weights()
# save model at given episode # ulozi model na urcitej epizode
if episode in [200, 1000, 2000, 3000]: if episode in [200, 1000, 2000, 3000]:
agent.save_model(str(episode)) agent.save_model(str(episode))
# save model at the best average score # ulozi model ak dosiahne najlepsie priemerne skore
if agent.epsilon == agent.eps_min: if agent.epsilon == agent.eps_min:
if avg_score >= best_score: if avg_score >= best_score:
agent.save_model("best_model") agent.save_model("best_model")
# replace best_score with higher score # ulozi najlepsie skore
if episode >= 400: if episode >= 400:
best_score = avg_score if best_score < avg_score else best_score best_score = avg_score if best_score < avg_score
else best_score
scores.append(score) scores.append(score)
avg_score = np.mean(scores[-50:]) avg_score = np.mean(scores[-50:])
# vypis pri procese trenovania
print('episode: ', episode, 'score: ', score, print('episode: ', episode, 'score: ', score,
' average score %.1f' % avg_score, 'best score %.2f' % best_score, ' average score %.1f' % avg_score,
'epsilon %.2f' % agent.epsilon,'loss: %.3f' % agent.loss, 'steps', learn_steps) 'best score %.2f' % best_score,
'epsilon %.2f' % agent.epsilon, 'steps', learn_steps)
writer.add_scalar('Epsilon', agent.epsilon, learn_steps) writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps) writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', score, learn_steps) writer.add_scalar('Score', score, learn_steps)
writer.add_scalar('Loss', agent.loss, learn_steps)
if per: if per:
writer.add_scalar('Beta', agent.memory.beta, learn_steps) writer.add_scalar('Beta', agent.memory.beta, learn_steps)
...@@ -153,58 +163,73 @@ def main(short_name, full_name, agent, per): ...@@ -153,58 +163,73 @@ def main(short_name, full_name, agent, per):
if __name__ == "__main__": if __name__ == "__main__":
# metoda a PER # kombinacie algoritmov pre trenovanie
combinations = [ combinations = [
(1, False), #DQN RM # (1, False), #DQN RM
# (2, True), #DDQN Priority # (2, True), #DDQN Priority
# (2, False), # DDQN RM # (2, False), # DDQN RM
# (4, False), #Dueling Double DQN RM # (4, False), #Dueling Double DQN RM
# (5, False), #Noisy DQN RM # (5, False), #Noisy DQN RM
# (9, False), # C51 DQN RM # (9, False), # C51 DQN RM
# (10, True), #rainbow # (10, True), #rainbow
# (10, False) # rainbow (10, False) # rainbow
] ]
# nazvy pre jednotlive algoritmy
network = { network = {
1: { 1: {
"full_name": "_ddeep_q_network", "full_name": "_deep_q_network",
"short_name": "__DQN", "short_name": "_DQN",
"agent": DQNAgent "agent": DQNAgent
}, },
2: { 2: {
"full_name": "_ddouble_deep_q_network", "full_name": "_double_deep_q_network",
"short_name": "__DDQN", "short_name": "_DDQN",
"agent": DDQNAgent "agent": DDQNAgent
}, },
3: {
"full_name": "_dueling_deep_q_network",
"short_name": "_DuelingDQN",
"agent": DuelDQNAgent
},
4: { 4: {
"full_name": "_dueling_double_q_network", "full_name": "_dueling_double_q_network",
"short_name": "_D3QN", "short_name": "_D3QN",
"agent": D3QNAgent "agent": D3QNAgent
}, },
5: { 5: {
"full_name": "_nnoisy_deep_q_network", "full_name": "_noisy_deep_q_network",
"short_name": "_NNoisy_DQN", "short_name": "_Noisy_DQN",
"agent": Noisy_DQNAgent "agent": Noisy_DQNAgent
}, },
6: {
"full_name": "_noisy_dueling_deep_q_network",
"short_name": "_Noisy_DuelingDQN",
"agent": Noisy_DuelDQNAgent
},
7: {
"full_name": "_noisy_dueling_double_deep_q_network",
"short_name": "_Noisy_D3QN",
"agent": Noisy_D3QNAgent
},
8: { 8: {
"full_name": "_n_step_deep_q_network", "full_name": "_n_step_deep_q_network",
"short_name": "_N_step_DQN", "short_name": "_N_step_DQN",
"agent": N_Step_DQNAgent "agent": N_Step_DQNAgent
}, },
9: { 9: {
"full_name": "__CC51_deep_q_network", "full_name": "_C51_deep_q_network",
"short_name": "_CC51_DQN", "short_name": "_C51_DQN",
"agent": C51_DQNAgent "agent": C51_DQNAgent
}, }
10: { 10: {
"full_name": "_rainbow_deep_q_network", "full_name": "_Rainbow_deep_q_network",
"short_name": "_rainbow_DQN", "short_name": "_Rainbow_DQN",
"agent": Rainbow_DQNAgent "agent": Rainbow_DQNAgent
} }
} }
# pre kazdu zvolenu kombinaciu trenuj agenta
# training all possible combinations
for combination in combinations: for combination in combinations:
selected_network = combination[0] selected_network = combination[0]
per = combination[1] per = combination[1]
...@@ -213,7 +238,7 @@ if __name__ == "__main__": ...@@ -213,7 +238,7 @@ if __name__ == "__main__":
full_name = network[selected_network]["full_name"] full_name = network[selected_network]["full_name"]
agent = network[selected_network]["agent"](per) agent = network[selected_network]["agent"](per)
# vypis zahajenia treningoveho procesu
print("Init training process with these parameters:") print("Init training process with these parameters:")
print("method: ", full_name) print("method: ", full_name)
print("using Prioritized Experience replay in Agent: ", per) print("using Prioritized Experience replay in Agent: ", per)
...@@ -222,6 +247,7 @@ if __name__ == "__main__": ...@@ -222,6 +247,7 @@ if __name__ == "__main__":
avg_score, best_score, learning_steps = main( avg_score, best_score, learning_steps = main(
short_name, full_name, agent, per) short_name, full_name, agent, per)
# ulozenie vysledku trenovania do textoveho suboru
with open("resultsContinueLast.txt", "a") as f: with open("resultsContinueLast.txt", "a") as f:
today = datetime.date.today().strftime("%d/%m/%y") today = datetime.date.today().strftime("%d/%m/%y")
f.write(f"Started training: {today}\n") f.write(f"Started training: {today}\n")
...@@ -239,4 +265,4 @@ if __name__ == "__main__": ...@@ -239,4 +265,4 @@ if __name__ == "__main__":
f.write(f"Average score: {avg_score}\n") f.write(f"Average score: {avg_score}\n")
f.write(f"Best score: {best_score}\n") f.write(f"Best score: {best_score}\n")
f.write("="*23) f.write("="*23)
f.write("\n") f.write("\n")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment