Commit ba63c457 authored by Ladislav Hlatky's avatar Ladislav Hlatky
Browse files

Update dqn-test3.py

parent f23ee2f6
......@@ -54,6 +54,7 @@ class DQN:
model = Sequential()
model.add(Dense(512, input_dim=self.num_observation_space, activation=relu))
model.add(Dense(256, activation=relu))
model.add(Dense(256, activation=relu))
model.add(Dense(self.num_action_space, activation=linear))
# kompilacia modelu
......@@ -108,7 +109,7 @@ class DQN:
random_sample = random.sample(self.replay_memory_buffer, self.batch_size)
return random_sample
def train(self, number_of_tests, num_episodes=50, number_of_steps=1000, can_stop=True):
def train(self, number_of_tests, num_episodes=600, number_of_steps=1000):
tests_duration = []
tests_num_of_ep_to_solve = np.full(number_of_tests, -1)
......@@ -120,7 +121,6 @@ class DQN:
for test in range(number_of_tests):
test_start = int(time.time())
rewards_per_episode = []
f_rewards_per_episode = open("./" + test_directory_name + "/" + str(test) + "_rewards.txt", "a", buffering=1)
f_epsilons_per_episode = open("./" + test_directory_name + "/" + str(test) + "_epsilons.txt", "a", buffering=1)
......@@ -131,6 +131,7 @@ class DQN:
episode = 0
step = 0
loss_history = 0
can_stop=True
print(tests_num_of_ep_to_solve)
print(tests_duration_when_solved)
......@@ -171,12 +172,13 @@ class DQN:
f_avg_loss_values_per_episode.write(f'%f\n' % mean_loss)
if (last_rewards_mean >= 200 and len(rewards_per_episode) >= 100) and (not can_stop):
if (last_rewards_mean >= 200 and len(self.rewards_list) >= 100) and can_stop:
self.model.save("./" + test_directory_name + "/q_saved-model_t" + str(test))
self.model.save("./" + test_directory_name + "/t_saved-model_t" + str(test))
self.tests_num_of_ep_to_solve[test]=episode
tests_num_of_ep_to_solve[test]=episode
tests_duration_when_solved[test]= int(time.time())-test_start
print("DQN Training Complete...")
can_stop = False
# break
print(episode, "\t: Episode || Reward: ",reward_for_episode, "\t|| Average Reward: ",last_rewards_mean, "\t epsilon: ", self.epsilon )
......@@ -188,7 +190,7 @@ class DQN:
f_avg_rewards_per_episode.close()
f_steps_per_episode.close()
f_avg_loss_values_per_episode.close()
f_report.write(f"Test %d\nduration:\n%d\nepisodes to LAST_100_REWARD_GOAL\n%d\n" % (test, tests_duration[test], tests_num_of_ep_to_solve[test]))
f_report.write(f"Test %d\nduration:\n%d\nepisodes to LAST_100_REWARD_GOAL\n%d\n\n" % (test, tests_duration[test], tests_num_of_ep_to_solve[test]))
f_duration_in_ep.write("%d\n" % (tests_num_of_ep_to_solve[test]))
f_duration_when_solved.write("%d\n" % (tests_duration_when_solved[test]))
......@@ -238,7 +240,7 @@ if __name__ == '__main__':
epsilon_decay = 0.995
epsilon_min = 0.01
gamma = 0.99
training_episodes = 50
training_episodes = 600
number_of_steps = 1000
batch_size = 64
memory_size = 500000
......@@ -254,6 +256,6 @@ if __name__ == '__main__':
# vytvorenie reportu na zaciatku trenovania
f_report = open(test_directory_name + "/testing_report.txt", "w+")
create_report(f_report, solver.model, lr, epsilon, epsilon_decay, gamma, training_episodes, number_of_tests, number_of_steps,epsilon_min)
solver.train(number_of_tests, training_episodes, number_of_steps, True)
solver.train(number_of_tests, training_episodes, number_of_steps)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment