Commit d098b029 authored by Ladislav Hlatky's avatar Ladislav Hlatky
Browse files

Update dqn-agent.py

parent 289c98ce
......@@ -4,6 +4,7 @@ import pandas as pd
from collections import deque
import random
import keras
import matplotlib.pyplot as plt
import time
import os
......@@ -18,7 +19,7 @@ from keras.losses import mean_squared_error
from keras.models import load_model
from collections import deque
fig, (ax_episode_reward, ax_last_mean_reward, ax_steps, ax_mean_loss, ax_epsilon) = plt.subplots(5, figsize=(8,9), dpi=80)
# pristup k hodnotam loss funkcie
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
......@@ -40,6 +41,14 @@ class DQN:
self.epsilon = epsilon
self.epsilon_decay = epsilon_decay
self.rewards_list = []
self.last_rewards_means = []
self.epsilon_values = []
self.steps_list = []
self.loss_means = []
self.loss_callback = LossHistory()
self.replay_memory_buffer = deque(maxlen=memory_size)
......@@ -54,7 +63,6 @@ class DQN:
model = Sequential()
model.add(Dense(512, input_dim=self.num_observation_space, activation=relu))
model.add(Dense(256, activation=relu))
model.add(Dense(256, activation=relu))
model.add(Dense(self.num_action_space, activation=linear))
# kompilacia modelu
......@@ -135,6 +143,20 @@ class DQN:
print(tests_num_of_ep_to_solve)
print(tests_duration_when_solved)
#fig.subplots_adjust(top=0.90)
ax_episode_reward.set_ylabel("reward for episode",fontsize=12)
ax_last_mean_reward.set_ylabel("mean reward for\nlast 100 episodes",fontsize=12)
ax_steps.set_ylabel("steps for episode",fontsize=12)
ax_mean_loss.set_ylabel("mean loss\nfor episode",fontsize=12)
ax_epsilon.set_ylabel("epsilon",fontsize=12)
ax_epsilon.set_xlabel("episode",fontsize=12)
plt.ion()
plt.grid()
fig.tight_layout()
plt.pause(0.001)
for episode in range(num_episodes):
state = env.reset()
reward_for_episode = 0
......@@ -162,8 +184,14 @@ class DQN:
self.epsilon *= self.epsilon_decay
self.rewards_list.append(reward_for_episode)
self.steps_list.append(step)
self.rewards_list.append(reward_for_episode)
self.epsilon_values.append(self.epsilon)
last_rewards_mean = np.mean(self.rewards_list[-100:])
self.last_rewards_means.append(last_rewards_mean)
f_rewards_per_episode.write('%d\n'%reward_for_episode)
f_epsilons_per_episode.write('%f\n' % self.epsilon)
f_avg_rewards_per_episode.write('%f\n' % last_rewards_mean)
......@@ -180,6 +208,31 @@ class DQN:
print("DQN Training Complete...")
can_stop = False
# break
if episode % 2 == 0:
ax_episode_reward.clear()
ax_last_mean_reward.clear()
ax_steps.clear()
ax_mean_loss.clear()
ax_epsilon.clear()
ax_episode_reward.set_ylabel("reward for episode",fontsize=12)
ax_last_mean_reward.set_ylabel("mean reward for\nlast 100 episodes",fontsize=12)
ax_steps.set_ylabel("steps for episode",fontsize=12)
ax_mean_loss.set_ylabel("mean loss\nfor episode",fontsize=12)
ax_epsilon.set_ylabel("epsilon",fontsize=12)
ax_epsilon.set_xlabel("episode",fontsize=12)
ax_episode_reward.plot(self.rewards_list)
ax_last_mean_reward.plot(self.last_rewards_means)
ax_steps.plot(self.steps_list)
ax_mean_loss.plot(loss_values_per_step)
ax_epsilon.plot(self.epsilon_values)
plt.pause(0.001)
plt.show()
print(episode, "\t: Episode || Reward: ",reward_for_episode, "\t|| Average Reward: ",last_rewards_mean, "\t epsilon: ", self.epsilon )
test_end = int(time.time())
......@@ -256,6 +309,6 @@ if __name__ == '__main__':
# vytvorenie reportu na zaciatku trenovania
f_report = open(test_directory_name + "/testing_report.txt", "w+")
create_report(f_report, solver.model, lr, epsilon, epsilon_decay, gamma, training_episodes, number_of_tests, number_of_steps,epsilon_min)
solver.train(number_of_tests, training_episodes, number_of_steps)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment