Commit 137fb3a3 authored by Eduard Pizur's avatar Eduard Pizur
Browse files

changed main.py

parent 0837f084
# importovanie kniznic
import sys
import os
import datetime
......@@ -6,8 +7,6 @@ from torch.utils.tensorboard import SummaryWriter
import numpy as np
from pathlib import Path
from PIL import Image as im
import gym
from gym.wrappers import AtariPreprocessing
from gym.wrappers import FrameStack
......@@ -15,55 +14,62 @@ from gym.wrappers import Monitor
from utils.constant import *
from utils.atari_wrappers import make_env
from utils.atari_wrapper_openai import make_atari, wrap_deepmind, wrap_pytorch
# from utils.atari_wrapper_openai import make_atari, wrap_deepmind,
wrap_pytorch
sys.path.append(os.path.abspath('../dp-2020'))
sys.path.append(os.path.abspath('../../'))
from utils.openai_baseline import make_atari, wrap_deepmind
from utils.atari_wrapper_openai import WrapPyTorch, wrap_pytorch
from utils.atari_wrapper_openai import WrapPyTorch
# nacitanie jednotlivych agentov pre rozne vylepsenia
if True:
# Vanilla deep q network
from agents.deep_q_network.agent import Agent as DQNAgent
# Double
from agents.double_deep_q_network.agent import Agent as DDQNAgent
# Dueling
from agents.dueling_double_q_network.agent import Agent as D3QNAgent
from agents.deep_q_network.agent
import Agent as DQNAgent
#Double
from agents.double_deep_q_network.agent
import Agent as DDQNAgent
#Dueling
from agents.dueling_deep_q_network.agent
import Agent as DuelDQNAgent
from agents.dueling_double_q_network.agent
import Agent as D3QNAgent
# Noisy
from agents.noisy_deep_q_network.agent import Agent as Noisy_DQNAgent
# N step
from agents.n_step_deep_q_network.agent import Agent as N_Step_DQNAgent
# Distributional
from agents.distributional_deep_q_network.agent import Agent as C51_DQNAgent
# Rainbow
from agents.rainbow_deep_q_network.agent import Agent as Rainbow_DQNAgent
from agents.noisy_deep_q_network.agent
import Agent as Noisy_DQNAgent
from agents.noisy_dueling_deep_q_network.agent
import Agent as Noisy_DuelDQNAgent
from agents.noisy_dueling_double_q_network.agent
import Agent as Noisy_D3QNAgent
#Distributional
from agents.distributional_deep_q_network.agent
import Agent as C51_DQNAgent
#Rainbow
from agents.rainbow_deep_q_network.agent
import Agent as Rainbow_DQNAgent
def main(short_name, full_name, agent, per):
# inicializacia prostredia a nastavenie predspracovania
env = gym.make(ENVIRONMENT)
env = wrap_deepmind(env)
env = WrapPyTorch(env)
# Initialize env using wrappers for Atari Games preprocessing from gym
env = make_env(ENVIRONMENT)
# env = gym.make(ENVIRONMENT)
# env = AtariPreprocessing(env, noop_max=0)
# env = FrameStack(env, 4)
# env = make_atari(ENVIRONMENT)
# env = wrap_deepmind(env)
# env = wrap_pytorch(env)
# Init tensorboard
# nastavenie ci sa vyuzije priorizovany pamatovy zasobnik
if per:
using_per = "using_PER"
else:
using_per = "using_RM"
run_name = "runs_f/{}/{}/{}".format(full_name,using_per,
datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"))
# nastavenie Tensorboardu
run_name = "runs/{}/{}/{}".format(full_name,using_per,
datetime.datetime.now()
.strftime("%Y-%m-%d_%H-%M"))
writer = SummaryWriter(run_name)
# capturing video
# nastavenie mena pre ulozenie videa z procesu trenovania
file_name = f"{short_name}_{using_per}"
video_path = os.path.join(
......@@ -72,40 +78,41 @@ def main(short_name, full_name, agent, per):
env = Monitor(
env, f"resources/video/{file_name}", force=True)
# init agent
# incializacia agenta
agent = agent
# init parameters
# inicializacia hodnot, ktore sa budu pouzivat pri trenovacom
# procese
best_score = 0
learn_steps = 0
avg_score = 0
scores = []
# add values to tensorboard
# pociatocne hodnoty do Tensorboardu
writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', best_score, learn_steps)
writer.add_scalar('Loss', agent.loss, learn_steps)
if per:
writer.add_scalar('Beta', agent.memory.beta, learn_steps)
# trenovaci proces
for episode in range(NUM_OF_EPISODES):
# initialization of each episode
# zaciatok epizody
state = env.reset()
done = False
score = 0
# repeat until agent dies
# opakuje az pokial agent nezomrie
while not done:
action = agent.choose_action(state)
next_state, reward, done, info = env.step(action)
# updating agent's memory
# aktualizacia agentoveho zasobnika
experience = (state, action, next_state, reward, done)
agent.append_experience(experience)
# if agent has enough experiences in the memory
# ak agent disponuje dostatocnym poctom spomienok na
# mozny proces trenovania
if agent.is_train_process_possible():
agent.train()
agent.decay_epsilon()
......@@ -114,34 +121,37 @@ def main(short_name, full_name, agent, per):
state = next_state
learn_steps += 1
# each n-step updates weights of target network
# kazdych n krokov agent si aktualizuje vahy v cielovej
# sieti
if learn_steps % TARGET_NET_UPDATE == 0:
agent.replace_weights()
# save model at given episode
# ulozi model na urcitej epizode
if episode in [200, 1000, 2000, 3000]:
agent.save_model(str(episode))
# save model at the best average score
# ulozi model ak dosiahne najlepsie priemerne skore
if agent.epsilon == agent.eps_min:
if avg_score >= best_score:
agent.save_model("best_model")
# replace best_score with higher score
# ulozi najlepsie skore
if episode >= 400:
best_score = avg_score if best_score < avg_score else best_score
best_score = avg_score if best_score < avg_score
else best_score
scores.append(score)
avg_score = np.mean(scores[-50:])
# vypis pri procese trenovania
print('episode: ', episode, 'score: ', score,
' average score %.1f' % avg_score, 'best score %.2f' % best_score,
'epsilon %.2f' % agent.epsilon,'loss: %.3f' % agent.loss, 'steps', learn_steps)
' average score %.1f' % avg_score,
'best score %.2f' % best_score,
'epsilon %.2f' % agent.epsilon, 'steps', learn_steps)
writer.add_scalar('Epsilon', agent.epsilon, learn_steps)
writer.add_scalar('Best Score', best_score, learn_steps)
writer.add_scalar('Score', score, learn_steps)
writer.add_scalar('Loss', agent.loss, learn_steps)
if per:
writer.add_scalar('Beta', agent.memory.beta, learn_steps)
......@@ -153,58 +163,73 @@ def main(short_name, full_name, agent, per):
if __name__ == "__main__":
# metoda a PER
# kombinacie algoritmov pre trenovanie
combinations = [
(1, False), #DQN RM
# (1, False), #DQN RM
# (2, True), #DDQN Priority
# (2, False), # DDQN RM
# (4, False), #Dueling Double DQN RM
# (5, False), #Noisy DQN RM
# (9, False), # C51 DQN RM
# (10, True), #rainbow
# (10, False) # rainbow
(10, False) # rainbow
]
# nazvy pre jednotlive algoritmy
network = {
1: {
"full_name": "_ddeep_q_network",
"short_name": "__DQN",
"full_name": "_deep_q_network",
"short_name": "_DQN",
"agent": DQNAgent
},
2: {
"full_name": "_ddouble_deep_q_network",
"short_name": "__DDQN",
"full_name": "_double_deep_q_network",
"short_name": "_DDQN",
"agent": DDQNAgent
},
3: {
"full_name": "_dueling_deep_q_network",
"short_name": "_DuelingDQN",
"agent": DuelDQNAgent
},
4: {
"full_name": "_dueling_double_q_network",
"short_name": "_D3QN",
"agent": D3QNAgent
},
5: {
"full_name": "_nnoisy_deep_q_network",
"short_name": "_NNoisy_DQN",
"full_name": "_noisy_deep_q_network",
"short_name": "_Noisy_DQN",
"agent": Noisy_DQNAgent
},
6: {
"full_name": "_noisy_dueling_deep_q_network",
"short_name": "_Noisy_DuelingDQN",
"agent": Noisy_DuelDQNAgent
},
7: {
"full_name": "_noisy_dueling_double_deep_q_network",
"short_name": "_Noisy_D3QN",
"agent": Noisy_D3QNAgent
},
8: {
"full_name": "_n_step_deep_q_network",
"short_name": "_N_step_DQN",
"agent": N_Step_DQNAgent
},
9: {
"full_name": "__CC51_deep_q_network",
"short_name": "_CC51_DQN",
"full_name": "_C51_deep_q_network",
"short_name": "_C51_DQN",
"agent": C51_DQNAgent
},
}
10: {
"full_name": "_rainbow_deep_q_network",
"short_name": "_rainbow_DQN",
"full_name": "_Rainbow_deep_q_network",
"short_name": "_Rainbow_DQN",
"agent": Rainbow_DQNAgent
}
}
# training all possible combinations
# pre kazdu zvolenu kombinaciu trenuj agenta
for combination in combinations:
selected_network = combination[0]
per = combination[1]
......@@ -213,7 +238,7 @@ if __name__ == "__main__":
full_name = network[selected_network]["full_name"]
agent = network[selected_network]["agent"](per)
# vypis zahajenia treningoveho procesu
print("Init training process with these parameters:")
print("method: ", full_name)
print("using Prioritized Experience replay in Agent: ", per)
......@@ -222,6 +247,7 @@ if __name__ == "__main__":
avg_score, best_score, learning_steps = main(
short_name, full_name, agent, per)
# ulozenie vysledku trenovania do textoveho suboru
with open("resultsContinueLast.txt", "a") as f:
today = datetime.date.today().strftime("%d/%m/%y")
f.write(f"Started training: {today}\n")
......@@ -239,4 +265,4 @@ if __name__ == "__main__":
f.write(f"Average score: {avg_score}\n")
f.write(f"Best score: {best_score}\n")
f.write("="*23)
f.write("\n")
f.write("\n")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment